aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c179
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c385
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h156
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c436
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c165
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h244
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h191
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c188
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.c722
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c678
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c811
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c122
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c266
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c337
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c228
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c380
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c585
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c409
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c218
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c243
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c113
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c208
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h110
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c383
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c205
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c262
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h102
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c)53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c587
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c1861
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c1384
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c247
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h132
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c390
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c127
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c826
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c1167
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c230
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c699
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c250
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c2459
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c302
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c412
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c227
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c242
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c855
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c1582
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c622
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c377
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c164
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c226
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c899
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c131
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c432
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h90
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c402
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c326
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c199
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c657
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c173
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c307
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c220
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c355
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c113
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h341
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c103
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h2
252 files changed, 24108 insertions, 12734 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 92056452a9e3..26685fd0fdaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -16,13 +16,9 @@ config MLX5_CORE
Core driver for low level functionality of the ConnectX-4 and
Connect-IB cards by Mellanox Technologies.
-config MLX5_ACCEL
- bool
-
config MLX5_FPGA
bool "Mellanox Technologies Innova support"
depends on MLX5_CORE
- select MLX5_ACCEL
help
Build support for the Innova family of network cards by Mellanox
Technologies. Innova network cards are comprised of a ConnectX chip
@@ -115,6 +111,7 @@ config MLX5_TC_CT
config MLX5_TC_SAMPLE
bool "MLX5 TC sample offload support"
depends on MLX5_CLS_ACT
+ depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m
default y
help
Say Y here if you want to support offloading sample rules via tc
@@ -142,71 +139,29 @@ config MLX5_CORE_IPOIB
help
MLX5 IPoIB offloads & acceleration support.
-config MLX5_FPGA_IPSEC
- bool "Mellanox Technologies IPsec Innova support"
- depends on MLX5_CORE
- depends on MLX5_FPGA
- help
- Build IPsec support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
-
-config MLX5_IPSEC
- bool "Mellanox Technologies IPsec Connect-X support"
+config MLX5_EN_MACSEC
+ bool "Connect-X support for MACSec offload"
depends on MLX5_CORE_EN
- depends on XFRM_OFFLOAD
- depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
- select MLX5_ACCEL
+ depends on MACSEC
+ default n
help
- Build IPsec support for the Connect-X family of network cards by Mellanox
- Technologies.
- Note: If you select this option, the mlx5_core driver will include
- IPsec support for the Connect-X family.
+ Build support for MACsec cryptography-offload acceleration in the NIC.
config MLX5_EN_IPSEC
- bool "IPSec XFRM cryptography-offload acceleration"
+ bool "Mellanox Technologies IPsec Connect-X support"
depends on MLX5_CORE_EN
depends on XFRM_OFFLOAD
depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
- depends on MLX5_FPGA_IPSEC || MLX5_IPSEC
help
Build support for IPsec cryptography-offload acceleration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
-config MLX5_FPGA_TLS
- bool "Mellanox Technologies TLS Innova support"
- depends on TLS_DEVICE
- depends on TLS=y || MLX5_CORE=m
- depends on MLX5_CORE_EN
- depends on MLX5_FPGA
- select MLX5_EN_TLS
- help
- Build TLS support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
-
-config MLX5_TLS
+config MLX5_EN_TLS
bool "Mellanox Technologies TLS Connect-X support"
depends on TLS_DEVICE
depends on TLS=y || MLX5_CORE=m
depends on MLX5_CORE_EN
- select MLX5_ACCEL
- select MLX5_EN_TLS
- help
- Build TLS support for the Connect-X family of network cards by Mellanox
- Technologies.
-
-config MLX5_EN_TLS
- bool
help
Build support for TLS cryptography-offload acceleration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
config MLX5_SW_STEERING
bool "Mellanox Technologies software-managed steering"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index e63bb9ceb9c0..a22c32aabf11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -14,10 +14,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
- fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
+ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
- fw_reset.o qos.o lib/tout.o
+ fw_reset.o qos.o lib/tout.o lib/aso.o
#
# Netdev basic
@@ -28,7 +28,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
- en/qos.o en/trap.o en/fs_tt_redirect.o
+ en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \
+ lib/crypto.o
#
# Netdev extra
@@ -39,14 +40,28 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
en_rep.o en/rep/bond.o en/mod_hdr.o \
- en/mapping.o
+ en/mapping.o lag/mpesw.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
lib/fs_chains.o en/tc_tun.o \
esw/indir_table.o en/tc_tun_encap.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
- en/tc/post_act.o en/tc/int_port.o
-mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
+ en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \
+ en/tc/post_meter.o
+
+mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \
+ en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \
+ en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \
+ en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \
+ en/tc/act/mirred.o en/tc/act/mirred_nic.o \
+ en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \
+ en/tc/act/redirect_ingress.o en/tc/act/police.o
+
+ifneq ($(CONFIG_MLX5_TC_CT),)
+ mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o
+ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o
+endif
+
mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
#
@@ -54,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
ecpf.o rdma.o esw/legacy.o \
- esw/devlink_port.o esw/vporttbl.o esw/qos.o
+ esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
@@ -75,17 +90,16 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
#
# Accelerations & FPGA
#
-mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o
-mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
-mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o
-mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o
-
mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
+mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \
+ en_accel/macsec_stats.o
+
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
- en_accel/ipsec_stats.o en_accel/ipsec_fs.o
+ en_accel/ipsec_stats.o en_accel/ipsec_fs.o \
+ en_accel/ipsec_offload.o
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \
en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \
en_accel/ktls_tx.o en_accel/ktls_rx.o
@@ -94,12 +108,14 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
steering/dr_icm_pool.o steering/dr_buddy.o \
steering/dr_ste.o steering/dr_send.o \
steering/dr_ste_v0.o steering/dr_ste_v1.o \
+ steering/dr_ste_v2.o \
steering/dr_cmd.o steering/dr_fw.o \
- steering/dr_action.o steering/fs_dr.o
+ steering/dr_action.o steering/fs_dr.o \
+ steering/dr_dbg.o lib/smfs.o
#
# SF device
#
-mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o
+mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o
#
# SF manager
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
deleted file mode 100644
index 82b185121edb..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __MLX5E_ACCEL_H__
-#define __MLX5E_ACCEL_H__
-
-#ifdef CONFIG_MLX5_ACCEL
-
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-
-static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
-{
- __be16 *ethtype;
-
- if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN))
- return false;
- ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
- if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
- return false;
- return true;
-}
-
-static inline void remove_metadata_hdr(struct sk_buff *skb)
-{
- struct ethhdr *old_eth;
- struct ethhdr *new_eth;
-
- /* Remove the metadata from the buffer */
- old_eth = (struct ethhdr *)skb->data;
- new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
- memmove(new_eth, old_eth, 2 * ETH_ALEN);
- /* Ethertype is already in its new place */
- skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
-}
-
-#endif /* CONFIG_MLX5_ACCEL */
-
-#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
deleted file mode 100644
index 09f5ce97af46..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-
-#include "accel/ipsec.h"
-#include "mlx5_core.h"
-#include "fpga/ipsec.h"
-#include "accel/ipsec_offload.h"
-
-void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops;
- int err = 0;
-
- ipsec_ops = (mlx5_ipsec_offload_ops(mdev)) ?
- mlx5_ipsec_offload_ops(mdev) :
- mlx5_fpga_ipsec_ops(mdev);
-
- if (!ipsec_ops || !ipsec_ops->init) {
- mlx5_core_dbg(mdev, "IPsec ops is not supported\n");
- return;
- }
-
- err = ipsec_ops->init(mdev);
- if (err) {
- mlx5_core_warn_once(mdev, "Failed to start IPsec device, err = %d\n", err);
- return;
- }
-
- mdev->ipsec_ops = ipsec_ops;
-}
-
-void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->cleanup)
- return;
-
- ipsec_ops->cleanup(mdev);
-}
-
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->device_caps)
- return 0;
-
- return ipsec_ops->device_caps(mdev);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
-
-unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->counters_count)
- return -EOPNOTSUPP;
-
- return ipsec_ops->counters_count(mdev);
-}
-
-int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int count)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->counters_read)
- return -EOPNOTSUPP;
-
- return ipsec_ops->counters_read(mdev, counters, count);
-}
-
-void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- u32 *sa_handle)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
- __be32 saddr[4] = {}, daddr[4] = {};
-
- if (!ipsec_ops || !ipsec_ops->create_hw_context)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (!xfrm->attrs.is_ipv6) {
- saddr[3] = xfrm->attrs.saddr.a4;
- daddr[3] = xfrm->attrs.daddr.a4;
- } else {
- memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr));
- memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr));
- }
-
- return ipsec_ops->create_hw_context(mdev, xfrm, saddr, daddr, xfrm->attrs.spi,
- xfrm->attrs.is_ipv6, sa_handle);
-}
-
-void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->free_hw_context)
- return;
-
- ipsec_ops->free_hw_context(context);
-}
-
-struct mlx5_accel_esp_xfrm *
-mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
- struct mlx5_accel_esp_xfrm *xfrm;
-
- if (!ipsec_ops || !ipsec_ops->esp_create_xfrm)
- return ERR_PTR(-EOPNOTSUPP);
-
- xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, flags);
- if (IS_ERR(xfrm))
- return xfrm;
-
- xfrm->mdev = mdev;
- return xfrm;
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
-
-void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->esp_destroy_xfrm)
- return;
-
- ipsec_ops->esp_destroy_xfrm(xfrm);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
-
-int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->esp_modify_xfrm)
- return -EOPNOTSUPP;
-
- return ipsec_ops->esp_modify_xfrm(xfrm, attrs);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
deleted file mode 100644
index fbb9c5415d53..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_ACCEL_IPSEC_H__
-#define __MLX5_ACCEL_IPSEC_H__
-
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/accel.h>
-
-#ifdef CONFIG_MLX5_ACCEL
-
-#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
-
-unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
-int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int count);
-
-void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- u32 *sa_handle);
-void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context);
-
-void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
-void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
-
-struct mlx5_accel_ipsec_ops {
- u32 (*device_caps)(struct mlx5_core_dev *mdev);
- unsigned int (*counters_count)(struct mlx5_core_dev *mdev);
- int (*counters_read)(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count);
- void* (*create_hw_context)(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- const __be32 saddr[4], const __be32 daddr[4],
- const __be32 spi, bool is_ipv6, u32 *sa_handle);
- void (*free_hw_context)(void *context);
- int (*init)(struct mlx5_core_dev *mdev);
- void (*cleanup)(struct mlx5_core_dev *mdev);
- struct mlx5_accel_esp_xfrm* (*esp_create_xfrm)(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags);
- int (*esp_modify_xfrm)(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs);
- void (*esp_destroy_xfrm)(struct mlx5_accel_esp_xfrm *xfrm);
-};
-
-#else
-
-#define MLX5_IPSEC_DEV(mdev) false
-
-static inline void *
-mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- u32 *sa_handle)
-{
- return NULL;
-}
-
-static inline void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) {}
-
-static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {}
-
-static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {}
-
-#endif /* CONFIG_MLX5_ACCEL */
-
-#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c
deleted file mode 100644
index d6667d38e1de..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c
+++ /dev/null
@@ -1,385 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
-
-#include "mlx5_core.h"
-#include "ipsec_offload.h"
-#include "lib/mlx5.h"
-#include "en_accel/ipsec_fs.h"
-
-#define MLX5_IPSEC_DEV_BASIC_CAPS (MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | \
- MLX5_ACCEL_IPSEC_CAP_LSO)
-
-struct mlx5_ipsec_sa_ctx {
- struct rhash_head hash;
- u32 enc_key_id;
- u32 ipsec_obj_id;
- /* hw ctx */
- struct mlx5_core_dev *dev;
- struct mlx5_ipsec_esp_xfrm *mxfrm;
-};
-
-struct mlx5_ipsec_esp_xfrm {
- /* reference counter of SA ctx */
- struct mlx5_ipsec_sa_ctx *sa_ctx;
- struct mutex lock; /* protects mlx5_ipsec_esp_xfrm */
- struct mlx5_accel_esp_xfrm accel_xfrm;
-};
-
-static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev)
-{
- u32 caps = MLX5_IPSEC_DEV_BASIC_CAPS;
-
- if (!mlx5_is_ipsec_device(mdev))
- return 0;
-
- if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) ||
- !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt))
- return 0;
-
- if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) &&
- MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt))
- caps |= MLX5_ACCEL_IPSEC_CAP_ESP;
-
- if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) {
- caps |= MLX5_ACCEL_IPSEC_CAP_ESN;
- caps |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
- }
-
- /* We can accommodate up to 2^24 different IPsec objects
- * because we use up to 24 bit in flow table metadata
- * to hold the IPsec Object unique handle.
- */
- WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24);
- return caps;
-}
-
-static int
-mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay (replay_type = %d)\n",
- attrs->replay_type);
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
- mlx5_core_err(mdev, "Only aes gcm keymat is supported (keymat_type = %d)\n",
- attrs->keymat_type);
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.iv_algo !=
- MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
- mlx5_core_err(mdev, "Only iv sequence algo is supported (iv_algo = %d)\n",
- attrs->keymat.aes_gcm.iv_algo);
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.key_len != 128 &&
- attrs->keymat.aes_gcm.key_len != 256) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with key length other than 128/256 bit (key length = %d)\n",
- attrs->keymat.aes_gcm.key_len);
- return -EOPNOTSUPP;
- }
-
- if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
- !MLX5_CAP_IPSEC(mdev, ipsec_esn)) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with ESN triggered\n");
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static struct mlx5_accel_esp_xfrm *
-mlx5_ipsec_offload_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- struct mlx5_ipsec_esp_xfrm *mxfrm;
- int err = 0;
-
- err = mlx5_ipsec_offload_esp_validate_xfrm_attrs(mdev, attrs);
- if (err)
- return ERR_PTR(err);
-
- mxfrm = kzalloc(sizeof(*mxfrm), GFP_KERNEL);
- if (!mxfrm)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&mxfrm->lock);
- memcpy(&mxfrm->accel_xfrm.attrs, attrs,
- sizeof(mxfrm->accel_xfrm.attrs));
-
- return &mxfrm->accel_xfrm;
-}
-
-static void mlx5_ipsec_offload_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- struct mlx5_ipsec_esp_xfrm *mxfrm = container_of(xfrm, struct mlx5_ipsec_esp_xfrm,
- accel_xfrm);
-
- /* assuming no sa_ctx are connected to this xfrm_ctx */
- WARN_ON(mxfrm->sa_ctx);
- kfree(mxfrm);
-}
-
-struct mlx5_ipsec_obj_attrs {
- const struct aes_gcm_keymat *aes_gcm;
- u32 accel_flags;
- u32 esn_msb;
- u32 enc_key_id;
-};
-
-static int mlx5_create_ipsec_obj(struct mlx5_core_dev *mdev,
- struct mlx5_ipsec_obj_attrs *attrs,
- u32 *ipsec_id)
-{
- const struct aes_gcm_keymat *aes_gcm = attrs->aes_gcm;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
- u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {};
- void *obj, *salt_p, *salt_iv_p;
- int err;
-
- obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object);
-
- /* salt and seq_iv */
- salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt);
- memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt));
-
- switch (aes_gcm->icv_len) {
- case 64:
- MLX5_SET(ipsec_obj, obj, icv_length,
- MLX5_IPSEC_OBJECT_ICV_LEN_8B);
- break;
- case 96:
- MLX5_SET(ipsec_obj, obj, icv_length,
- MLX5_IPSEC_OBJECT_ICV_LEN_12B);
- break;
- case 128:
- MLX5_SET(ipsec_obj, obj, icv_length,
- MLX5_IPSEC_OBJECT_ICV_LEN_16B);
- break;
- default:
- return -EINVAL;
- }
- salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv);
- memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv));
- /* esn */
- if (attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
- MLX5_SET(ipsec_obj, obj, esn_en, 1);
- MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn_msb);
- if (attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
- MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
- }
-
- MLX5_SET(ipsec_obj, obj, dekn, attrs->enc_key_id);
-
- /* general object fields set */
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
- MLX5_GENERAL_OBJECT_TYPES_IPSEC);
-
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *ipsec_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-
- return err;
-}
-
-static void mlx5_destroy_ipsec_obj(struct mlx5_core_dev *mdev, u32 ipsec_id)
-{
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
- MLX5_GENERAL_OBJECT_TYPES_IPSEC);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, ipsec_id);
-
- mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static void *mlx5_ipsec_offload_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4], const __be32 daddr[4],
- const __be32 spi, bool is_ipv6, u32 *hw_handle)
-{
- struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs = &accel_xfrm->attrs;
- struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
- struct mlx5_ipsec_obj_attrs ipsec_attrs = {};
- struct mlx5_ipsec_esp_xfrm *mxfrm;
- struct mlx5_ipsec_sa_ctx *sa_ctx;
- int err;
-
- /* alloc SA context */
- sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
- if (!sa_ctx)
- return ERR_PTR(-ENOMEM);
-
- sa_ctx->dev = mdev;
-
- mxfrm = container_of(accel_xfrm, struct mlx5_ipsec_esp_xfrm, accel_xfrm);
- mutex_lock(&mxfrm->lock);
- sa_ctx->mxfrm = mxfrm;
-
- /* key */
- err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key,
- aes_gcm->key_len / BITS_PER_BYTE,
- MLX5_ACCEL_OBJ_IPSEC_KEY,
- &sa_ctx->enc_key_id);
- if (err) {
- mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err);
- goto err_sa_ctx;
- }
-
- ipsec_attrs.aes_gcm = aes_gcm;
- ipsec_attrs.accel_flags = accel_xfrm->attrs.flags;
- ipsec_attrs.esn_msb = accel_xfrm->attrs.esn;
- ipsec_attrs.enc_key_id = sa_ctx->enc_key_id;
- err = mlx5_create_ipsec_obj(mdev, &ipsec_attrs,
- &sa_ctx->ipsec_obj_id);
- if (err) {
- mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err);
- goto err_enc_key;
- }
-
- *hw_handle = sa_ctx->ipsec_obj_id;
- mxfrm->sa_ctx = sa_ctx;
- mutex_unlock(&mxfrm->lock);
-
- return sa_ctx;
-
-err_enc_key:
- mlx5_destroy_encryption_key(mdev, sa_ctx->enc_key_id);
-err_sa_ctx:
- mutex_unlock(&mxfrm->lock);
- kfree(sa_ctx);
- return ERR_PTR(err);
-}
-
-static void mlx5_ipsec_offload_delete_sa_ctx(void *context)
-{
- struct mlx5_ipsec_sa_ctx *sa_ctx = (struct mlx5_ipsec_sa_ctx *)context;
- struct mlx5_ipsec_esp_xfrm *mxfrm = sa_ctx->mxfrm;
-
- mutex_lock(&mxfrm->lock);
- mlx5_destroy_ipsec_obj(sa_ctx->dev, sa_ctx->ipsec_obj_id);
- mlx5_destroy_encryption_key(sa_ctx->dev, sa_ctx->enc_key_id);
- kfree(sa_ctx);
- mxfrm->sa_ctx = NULL;
- mutex_unlock(&mxfrm->lock);
-}
-
-static int mlx5_ipsec_offload_init(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static int mlx5_modify_ipsec_obj(struct mlx5_core_dev *mdev,
- struct mlx5_ipsec_obj_attrs *attrs,
- u32 ipsec_id)
-{
- u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {};
- u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)];
- u64 modify_field_select = 0;
- u64 general_obj_types;
- void *obj;
- int err;
-
- if (!(attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED))
- return 0;
-
- general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
- if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
- return -EINVAL;
-
- /* general object fields set */
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, ipsec_id);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
- if (err) {
- mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n",
- ipsec_id, err);
- return err;
- }
-
- obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object);
- modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select);
-
- /* esn */
- if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) ||
- !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB))
- return -EOPNOTSUPP;
-
- obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object);
- MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn_msb);
- if (attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
- MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
-
- /* general object fields set */
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
-
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_ipsec_offload_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- struct mlx5_ipsec_obj_attrs ipsec_attrs = {};
- struct mlx5_core_dev *mdev = xfrm->mdev;
- struct mlx5_ipsec_esp_xfrm *mxfrm;
-
- int err = 0;
-
- if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
- return 0;
-
- if (mlx5_ipsec_offload_esp_validate_xfrm_attrs(mdev, attrs))
- return -EOPNOTSUPP;
-
- mxfrm = container_of(xfrm, struct mlx5_ipsec_esp_xfrm, accel_xfrm);
-
- mutex_lock(&mxfrm->lock);
-
- if (!mxfrm->sa_ctx)
- /* Not bound xfrm, change only sw attrs */
- goto change_sw_xfrm_attrs;
-
- /* need to add find and replace in ipsec_rhash_sa the sa_ctx */
- /* modify device with new hw_sa */
- ipsec_attrs.accel_flags = attrs->flags;
- ipsec_attrs.esn_msb = attrs->esn;
- err = mlx5_modify_ipsec_obj(mdev,
- &ipsec_attrs,
- mxfrm->sa_ctx->ipsec_obj_id);
-
-change_sw_xfrm_attrs:
- if (!err)
- memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
-
- mutex_unlock(&mxfrm->lock);
- return err;
-}
-
-static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = {
- .device_caps = mlx5_ipsec_offload_device_caps,
- .create_hw_context = mlx5_ipsec_offload_create_sa_ctx,
- .free_hw_context = mlx5_ipsec_offload_delete_sa_ctx,
- .init = mlx5_ipsec_offload_init,
- .esp_create_xfrm = mlx5_ipsec_offload_esp_create_xfrm,
- .esp_destroy_xfrm = mlx5_ipsec_offload_esp_destroy_xfrm,
- .esp_modify_xfrm = mlx5_ipsec_offload_esp_modify_xfrm,
-};
-
-const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev)
-{
- if (!mlx5_ipsec_offload_device_caps(mdev))
- return NULL;
-
- return &ipsec_offload_ops;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h
deleted file mode 100644
index 970c66d19c1d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
-
-#ifndef __MLX5_IPSEC_OFFLOAD_H__
-#define __MLX5_IPSEC_OFFLOAD_H__
-
-#include <linux/mlx5/driver.h>
-#include "accel/ipsec.h"
-
-#ifdef CONFIG_MLX5_IPSEC
-
-const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev);
-static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- if (!MLX5_CAP_GEN(mdev, ipsec_offload))
- return false;
-
- if (!MLX5_CAP_GEN(mdev, log_max_dek))
- return false;
-
- if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
- MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
- return false;
-
- return MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) &&
- MLX5_CAP_ETH(mdev, insert_trailer);
-}
-
-#else
-static inline const struct mlx5_accel_ipsec_ops *
-mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) { return NULL; }
-static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- return false;
-}
-
-#endif /* CONFIG_MLX5_IPSEC */
-#endif /* __MLX5_IPSEC_OFFLOAD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
deleted file mode 100644
index 6c2b86a26863..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-
-#include "accel/tls.h"
-#include "mlx5_core.h"
-#include "lib/mlx5.h"
-
-#ifdef CONFIG_MLX5_FPGA_TLS
-#include "fpga/tls.h"
-
-int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx)
-{
- return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
- start_offload_tcp_sn, p_swid,
- direction_sx);
-}
-
-void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx)
-{
- mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
-}
-
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn)
-{
- return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
-}
-
-bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_is_tls_device(mdev) ||
- mlx5_accel_is_ktls_device(mdev);
-}
-
-u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_tls_device_caps(mdev);
-}
-
-int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_tls_init(mdev);
-}
-
-void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
-{
- mlx5_fpga_tls_cleanup(mdev);
-}
-#endif
-
-#ifdef CONFIG_MLX5_TLS
-int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id)
-{
- u32 sz_bytes;
- void *key;
-
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *info =
- (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
-
- key = info->key;
- sz_bytes = sizeof(info->key);
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *info =
- (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
-
- key = info->key;
- sz_bytes = sizeof(info->key);
- break;
- }
- default:
- return -EINVAL;
- }
-
- return mlx5_create_encryption_key(mdev, key, sz_bytes,
- MLX5_ACCEL_OBJ_TLS_KEY,
- p_key_id);
-}
-
-void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
-{
- mlx5_destroy_encryption_key(mdev, key_id);
-}
-#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
deleted file mode 100644
index fd874f0c380a..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_ACCEL_TLS_H__
-#define __MLX5_ACCEL_TLS_H__
-
-#include <linux/mlx5/driver.h>
-#include <linux/tls.h>
-
-#ifdef CONFIG_MLX5_TLS
-int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id);
-void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
-
-static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
-{
- return MLX5_CAP_GEN(mdev, tls_tx);
-}
-
-static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
-{
- return MLX5_CAP_GEN(mdev, tls_rx);
-}
-
-static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
-{
- if (!mlx5_accel_is_ktls_tx(mdev) &&
- !mlx5_accel_is_ktls_rx(mdev))
- return false;
-
- if (!MLX5_CAP_GEN(mdev, log_max_dek))
- return false;
-
- return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
-}
-
-static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info)
-{
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- if (crypto_info->version == TLS_1_2_VERSION)
- return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
- break;
- }
-
- return false;
-}
-#else
-static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
-{ return false; }
-
-static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
-{ return false; }
-
-static inline int
-mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id) { return -ENOTSUPP; }
-static inline void
-mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
-
-static inline bool
-mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
-static inline bool
-mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info) { return false; }
-#endif
-
-enum {
- MLX5_ACCEL_TLS_TX = BIT(0),
- MLX5_ACCEL_TLS_RX = BIT(1),
- MLX5_ACCEL_TLS_V12 = BIT(2),
- MLX5_ACCEL_TLS_V13 = BIT(3),
- MLX5_ACCEL_TLS_LRO = BIT(4),
- MLX5_ACCEL_TLS_IPV6 = BIT(5),
- MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
- MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
-};
-
-struct mlx5_ifc_tls_flow_bits {
- u8 src_port[0x10];
- u8 dst_port[0x10];
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
- u8 ipv6[0x1];
- u8 direction_sx[0x1];
- u8 reserved_at_2[0x1e];
-};
-
-#ifdef CONFIG_MLX5_FPGA_TLS
-int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx);
-void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx);
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn);
-bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
-u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
-int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
-void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
-
-#else
-
-static inline int
-mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx) { return -ENOTSUPP; }
-static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx) { }
-static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return mlx5_accel_is_ktls_device(mdev);
-}
-static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
-static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
-static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-#endif
-
-#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 291e427e9e4f..6aca004e88cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -71,53 +71,6 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
return cpu_handle;
}
-static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_frag_buf *buf, int node)
-{
- dma_addr_t t;
-
- buf->size = size;
- buf->npages = 1;
- buf->page_shift = (u8)get_order(size) + PAGE_SHIFT;
-
- buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
- if (!buf->frags)
- return -ENOMEM;
-
- buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size,
- &t, node);
- if (!buf->frags->buf)
- goto err_out;
-
- buf->frags->map = t;
-
- while (t & ((1 << buf->page_shift) - 1)) {
- --buf->page_shift;
- buf->npages *= 2;
- }
-
- return 0;
-err_out:
- kfree(buf->frags);
- return -ENOMEM;
-}
-
-int mlx5_buf_alloc(struct mlx5_core_dev *dev,
- int size, struct mlx5_frag_buf *buf)
-{
- return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
-}
-EXPORT_SYMBOL(mlx5_buf_alloc);
-
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
-{
- dma_free_coherent(mlx5_core_dma_dev(dev), buf->size, buf->frags->buf,
- buf->frags->map);
-
- kfree(buf->frags);
-}
-EXPORT_SYMBOL_GPL(mlx5_buf_free);
-
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node)
{
@@ -183,11 +136,11 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
u32 db_per_page = PAGE_SIZE / cache_line_size();
struct mlx5_db_pgdir *pgdir;
- pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ pgdir = kzalloc_node(sizeof(*pgdir), GFP_KERNEL, node);
if (!pgdir)
return NULL;
- pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL);
+ pgdir->bitmap = bitmap_zalloc_node(db_per_page, GFP_KERNEL, node);
if (!pgdir->bitmap) {
kfree(pgdir);
return NULL;
@@ -260,12 +213,6 @@ out:
}
EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
-int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
-{
- return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
-}
-EXPORT_SYMBOL_GPL(mlx5_db_alloc);
-
void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
{
u32 db_per_page = PAGE_SIZE / cache_line_size();
@@ -286,19 +233,6 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
}
EXPORT_SYMBOL_GPL(mlx5_db_free);
-void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
-{
- u64 addr;
- int i;
-
- for (i = 0; i < buf->npages; i++) {
- addr = buf->frags->map + (i << buf->page_shift);
-
- pas[i] = cpu_to_be64(addr);
- }
-}
-EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
-
void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm)
{
int i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a46284ca5172..2e0d59ca62b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -31,7 +31,6 @@
*/
#include <linux/highmem.h>
-#include <linux/module.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
@@ -131,11 +130,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cmd->alloc_lock, flags);
+ lockdep_assert_held(&cmd->alloc_lock);
set_bit(idx, &cmd->bitmask);
- spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
@@ -145,13 +141,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
{
+ struct mlx5_cmd *cmd = ent->cmd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
if (!refcount_dec_and_test(&ent->refcnt))
- return;
+ goto out;
- if (ent->idx >= 0)
- cmd_free_index(ent->cmd, ent->idx);
+ if (ent->idx >= 0) {
+ cmd_free_index(cmd, ent->idx);
+ up(ent->page_queue ? &cmd->pages_sem : &cmd->sem);
+ }
cmd_free_ent(ent);
+out:
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
@@ -186,10 +190,10 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block)
int xor_len = sizeof(*block) - sizeof(block->data) - 1;
if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
- return -EINVAL;
+ return -EHWPOISON;
if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
- return -EINVAL;
+ return -EHWPOISON;
return 0;
}
@@ -255,12 +259,12 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
if (sig != 0xff)
- return -EINVAL;
+ return -EHWPOISON;
for (i = 0; i < n && next; i++) {
err = verify_block_sig(next->buf);
if (err)
- return err;
+ return -EHWPOISON;
next = next->next;
}
@@ -473,9 +477,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VHCA_STATE:
case MLX5_CMD_OP_MODIFY_VHCA_STATE:
case MLX5_CMD_OP_ALLOC_SF:
+ case MLX5_CMD_OP_SUSPEND_VHCA:
+ case MLX5_CMD_OP_RESUME_VHCA:
+ case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+ case MLX5_CMD_OP_SAVE_VHCA_STATE:
+ case MLX5_CMD_OP_LOAD_VHCA_STATE:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
- return -EIO;
+ return -ENOLINK;
default:
mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
return -EINVAL;
@@ -670,6 +679,11 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE);
MLX5_COMMAND_STR_CASE(ALLOC_SF);
MLX5_COMMAND_STR_CASE(DEALLOC_SF);
+ MLX5_COMMAND_STR_CASE(SUSPEND_VHCA);
+ MLX5_COMMAND_STR_CASE(RESUME_VHCA);
+ MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE);
+ MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE);
default: return "unknown command opcode";
}
}
@@ -756,44 +770,72 @@ struct mlx5_ifc_mbox_in_bits {
u8 reserved_at_40[0x40];
};
-void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome)
+void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
{
- *status = MLX5_GET(mbox_out, out, status);
- *syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
+
+ mlx5_core_err_rl(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
+ mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome, cmd_status_to_err(status));
}
+EXPORT_SYMBOL(mlx5_cmd_out_err);
-static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
+static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
{
+ u16 opcode, op_mod;
u32 syndrome;
u8 status;
- u16 opcode;
- u16 op_mod;
u16 uid;
+ int err;
- mlx5_cmd_mbox_status(out, &status, &syndrome);
- if (!status)
- return 0;
+ syndrome = MLX5_GET(mbox_out, out, syndrome);
+ status = MLX5_GET(mbox_out, out, status);
opcode = MLX5_GET(mbox_in, in, opcode);
op_mod = MLX5_GET(mbox_in, in, op_mod);
uid = MLX5_GET(mbox_in, in, uid);
+ err = cmd_status_to_err(status);
+
if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
- mlx5_core_err_rl(dev,
- "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
- mlx5_command_str(opcode), opcode, op_mod,
- cmd_status_str(status), status, syndrome);
+ mlx5_cmd_out_err(dev, opcode, op_mod, out);
else
mlx5_core_dbg(dev,
- "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
- mlx5_command_str(opcode),
- opcode, op_mod,
- cmd_status_str(status),
- status,
- syndrome);
+ "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
+ mlx5_command_str(opcode), opcode, op_mod, uid,
+ cmd_status_str(status), status, syndrome, err);
+}
+
+int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out)
+{
+ /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */
+ if (err == -ENXIO) {
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+ u32 syndrome;
+ u8 status;
+
+ /* PCI Error, emulate command return status, for smooth reset */
+ err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status);
+ MLX5_SET(mbox_out, out, status, status);
+ MLX5_SET(mbox_out, out, syndrome, syndrome);
+ if (!err)
+ return 0;
+ }
+
+ /* driver or FW delivery error */
+ if (err != -EREMOTEIO && err)
+ return err;
- return cmd_status_to_err(status);
+ /* check outbox status */
+ err = cmd_status_to_err(MLX5_GET(mbox_out, out, status));
+ if (err)
+ cmd_status_print(dev, in, out);
+
+ return err;
}
+EXPORT_SYMBOL(mlx5_cmd_check);
static void dump_command(struct mlx5_core_dev *dev,
struct mlx5_cmd_work_ent *ent, int input)
@@ -900,25 +942,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
return cmd->allowed_opcode == opcode;
}
-static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
-{
- unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
- int idx;
-
-retry:
- idx = cmd_alloc_index(cmd);
- if (idx < 0 && time_before(jiffies, alloc_end)) {
- /* Index allocation can fail on heavy load of commands. This is a temporary
- * situation as the current command already holds the semaphore, meaning that
- * another command completion is being handled and it is expected to release
- * the entry index soon.
- */
- cpu_relax();
- goto retry;
- }
- return idx;
-}
-
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
{
return pci_channel_offline(dev->pdev) ||
@@ -946,7 +969,7 @@ static void cmd_work_handler(struct work_struct *work)
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem);
if (!ent->page_queue) {
- alloc_ret = cmd_alloc_index_retry(cmd);
+ alloc_ret = cmd_alloc_index(cmd);
if (alloc_ret < 0) {
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
if (ent->callback) {
@@ -995,13 +1018,7 @@ static void cmd_work_handler(struct work_struct *work)
/* Skip sending command to fw if internal error */
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
- u8 status = 0;
- u32 drv_synd;
-
- ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
- MLX5_SET(mbox_out, ent->out, status, status);
- MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
-
+ ent->ret = -ENXIO;
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
return;
}
@@ -1020,6 +1037,31 @@ static void cmd_work_handler(struct work_struct *work)
}
}
+static int deliv_status_to_err(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_DELIVERY_STAT_OK:
+ case MLX5_DRIVER_STATUS_ABORTED:
+ return 0;
+ case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+ case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+ return -EBADR;
+ case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+ case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+ case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+ return -EFAULT; /* Bad address */
+ case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+ case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+ case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+ case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+ return -ENOMSG;
+ case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+ return -EIO;
+ default:
+ return -EINVAL;
+ }
+}
+
static const char *deliv_status_to_str(u8 status)
{
switch (status) {
@@ -1116,16 +1158,27 @@ out_err:
/* Notes:
* 1. Callback functions may not sleep
* 2. page queue commands do not support asynchrous completion
+ *
+ * return value in case (!callback):
+ * ret < 0 : Command execution couldn't be submitted by driver
+ * ret > 0 : Command execution couldn't be performed by firmware
+ * ret == 0: Command was executed by FW, Caller must check FW outbox status.
+ *
+ * return value in case (callback):
+ * ret < 0 : Command execution couldn't be submitted by driver
+ * ret == 0: Command will be submitted to FW for execution
+ * and the callback will be called for further status updates
*/
static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out, void *uout, int uout_size,
mlx5_cmd_cbk_t callback,
- void *context, int page_queue, u8 *status,
+ void *context, int page_queue,
u8 token, bool force_polling)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
struct mlx5_cmd_stats *stats;
+ u8 status = 0;
int err = 0;
s64 ds;
u16 op;
@@ -1156,12 +1209,12 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
cmd_work_handler(&ent->work);
} else if (!queue_work(cmd->wq, &ent->work)) {
mlx5_core_warn(dev, "failed to queue work\n");
- err = -ENOMEM;
+ err = -EALREADY;
goto out_free;
}
if (callback)
- goto out; /* mlx5_cmd_comp_handler() will put(ent) */
+ return 0; /* mlx5_cmd_comp_handler() will put(ent) */
err = wait_func(dev, ent);
if (err == -ETIMEDOUT || err == -ECANCELED)
@@ -1179,12 +1232,11 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
"fw exec time for %s is %lld nsec\n",
mlx5_command_str(op), ds);
- *status = ent->status;
out_free:
+ status = ent->status;
cmd_ent_put(ent);
-out:
- return err;
+ return err ? : status;
}
static ssize_t dbg_write(struct file *filp, const char __user *buf,
@@ -1501,7 +1553,7 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
{
struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
- dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
+ dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev));
debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops);
debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops);
@@ -1602,8 +1654,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
vector = vec & 0xffffffff;
for (i = 0; i < (1 << cmd->log_sz); i++) {
if (test_bit(i, &vector)) {
- struct semaphore *sem;
-
ent = cmd->ent_arr[i];
/* if we already completed the command, ignore it */
@@ -1626,22 +1676,18 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
cmd_ent_put(ent);
- if (ent->page_queue)
- sem = &cmd->pages_sem;
- else
- sem = &cmd->sem;
ent->ts2 = ktime_get_ns();
memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
dump_command(dev, ent, 0);
- if (!ent->ret) {
+
+ if (vec & MLX5_TRIGGERED_CMD_COMP)
+ ent->ret = -ENXIO;
+
+ if (!ent->ret) { /* Command completed by FW */
if (!cmd->checksum_disabled)
ent->ret = verify_signature(ent);
- else
- ent->ret = 0;
- if (vec & MLX5_TRIGGERED_CMD_COMP)
- ent->status = MLX5_DRIVER_STATUS_ABORTED;
- else
- ent->status = ent->lay->status_own >> 1;
+
+ ent->status = ent->lay->status_own >> 1;
mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
ent->ret, deliv_status_to_str(ent->status), ent->status);
@@ -1659,21 +1705,18 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
callback = ent->callback;
context = ent->context;
- err = ent->ret;
- if (!err) {
+ err = ent->ret ? : ent->status;
+ if (err > 0) /* Failed in FW, command didn't execute */
+ err = deliv_status_to_err(err);
+
+ if (!err)
err = mlx5_copy_from_msg(ent->uout,
ent->out,
ent->uout_size);
- err = err ? err : mlx5_cmd_check(dev,
- ent->in->first.data,
- ent->uout);
- }
-
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
- err = err ? err : ent->status;
/* final consumer is done, release ent */
cmd_ent_put(ent);
callback(err, context);
@@ -1683,12 +1726,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
*/
complete(&ent->done);
}
- up(sem);
}
}
}
-void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
{
struct mlx5_cmd *cmd = &dev->cmd;
unsigned long bitmask;
@@ -1728,12 +1770,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev)
struct mlx5_cmd *cmd = &dev->cmd;
int i;
- for (i = 0; i < cmd->max_reg_cmds; i++)
- while (down_trylock(&cmd->sem))
+ for (i = 0; i < cmd->max_reg_cmds; i++) {
+ while (down_trylock(&cmd->sem)) {
mlx5_cmd_trigger_completions(dev);
+ cond_resched();
+ }
+ }
- while (down_trylock(&cmd->pages_sem))
+ while (down_trylock(&cmd->pages_sem)) {
mlx5_cmd_trigger_completions(dev);
+ cond_resched();
+ }
/* Unlock cmdif */
up(&cmd->pages_sem);
@@ -1741,31 +1788,6 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev)
up(&cmd->sem);
}
-static int status_to_err(u8 status)
-{
- switch (status) {
- case MLX5_CMD_DELIVERY_STAT_OK:
- case MLX5_DRIVER_STATUS_ABORTED:
- return 0;
- case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
- case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
- return -EBADR;
- case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
- case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
- case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
- return -EFAULT; /* Bad address */
- case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
- case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
- case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
- case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
- return -ENOMSG;
- case MLX5_CMD_DELIVERY_STAT_FW_ERR:
- return -EIO;
- default:
- return -EINVAL;
- }
-}
-
static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
gfp_t gfp)
{
@@ -1809,27 +1831,23 @@ static int is_manage_pages(void *in)
return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES;
}
+/* Notes:
+ * 1. Callback functions may not sleep
+ * 2. Page queue commands do not support asynchrous completion
+ */
static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int out_size, mlx5_cmd_cbk_t callback, void *context,
bool force_polling)
{
- struct mlx5_cmd_msg *inb;
- struct mlx5_cmd_msg *outb;
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+ struct mlx5_cmd_msg *inb, *outb;
int pages_queue;
gfp_t gfp;
- int err;
- u8 status = 0;
- u32 drv_synd;
- u16 opcode;
u8 token;
+ int err;
- opcode = MLX5_GET(mbox_in, in, opcode);
- if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
- err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
- MLX5_SET(mbox_out, out, status, status);
- MLX5_SET(mbox_out, out, syndrome, drv_synd);
- return err;
- }
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode))
+ return -ENXIO;
pages_queue = is_manage_pages(in);
gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
@@ -1855,46 +1873,143 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
}
err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
- pages_queue, &status, token, force_polling);
+ pages_queue, token, force_polling);
+ if (callback)
+ return err;
+
+ if (err > 0) /* Failed in FW, command didn't execute */
+ err = deliv_status_to_err(err);
+
if (err)
goto out_out;
- mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
- if (status) {
- err = status_to_err(status);
- goto out_out;
+ /* command completed by FW */
+ err = mlx5_copy_from_msg(out, outb, out_size);
+out_out:
+ mlx5_free_cmd_msg(dev, outb);
+out_in:
+ free_msg(dev, inb);
+ return err;
+}
+
+static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
+ u32 syndrome, int err)
+{
+ struct mlx5_cmd_stats *stats;
+
+ if (!err)
+ return;
+
+ stats = &dev->cmd.stats[opcode];
+ spin_lock_irq(&stats->lock);
+ stats->failed++;
+ if (err < 0)
+ stats->last_failed_errno = -err;
+ if (err == -EREMOTEIO) {
+ stats->failed_mbox_status++;
+ stats->last_failed_mbox_status = status;
+ stats->last_failed_syndrome = syndrome;
}
+ spin_unlock_irq(&stats->lock);
+}
- if (!callback)
- err = mlx5_copy_from_msg(out, outb, out_size);
+/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
+static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
+{
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
-out_out:
- if (!callback)
- mlx5_free_cmd_msg(dev, outb);
+ if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
+ err = -EIO;
-out_in:
- if (!callback)
- free_msg(dev, inb);
+ if (!err && status != MLX5_CMD_STAT_OK)
+ err = -EREMOTEIO;
+
+ cmd_status_log(dev, opcode, status, syndrome, err);
+ return err;
+}
+
+/**
+ * mlx5_cmd_do - Executes a fw command, wait for completion.
+ * Unlike mlx5_cmd_exec, this function will not translate or intercept
+ * outbox.status and will return -EREMOTEIO when
+ * outbox.status != MLX5_CMD_STAT_OK
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return:
+ * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK.
+ * Caller must check FW outbox status.
+ * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK.
+ * < 0 : Command execution couldn't be performed by firmware or driver
+ */
+int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
+{
+ int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+ err = cmd_status_err(dev, err, opcode, out);
return err;
}
+EXPORT_SYMBOL(mlx5_cmd_do);
+/**
+ * mlx5_cmd_exec - Executes a fw command, wait for completion
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return: 0 if no error, FW command execution was successful
+ * and outbox status is ok.
+ */
int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int out_size)
{
- int err;
+ int err = mlx5_cmd_do(dev, in, in_size, out, out_size);
- err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
- return err ? : mlx5_cmd_check(dev, in, out);
+ return mlx5_cmd_check(dev, err, in, out);
}
EXPORT_SYMBOL(mlx5_cmd_exec);
+/**
+ * mlx5_cmd_exec_polling - Executes a fw command, poll for completion
+ * Needed for driver force teardown, when command completion EQ
+ * will not be available to complete the command
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return: 0 if no error, FW command execution was successful
+ * and outbox status is ok.
+ */
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size)
+{
+ int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+ err = cmd_status_err(dev, err, opcode, out);
+ return mlx5_cmd_check(dev, err, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_polling);
+
void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
struct mlx5_async_ctx *ctx)
{
ctx->dev = dev;
/* Starts at 1 to avoid doing wake_up if we are not cleaning up */
atomic_set(&ctx->num_inflight, 1);
- init_waitqueue_head(&ctx->wait);
+ init_completion(&ctx->inflight_done);
}
EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
@@ -1908,19 +2023,21 @@ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
*/
void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
{
- atomic_dec(&ctx->num_inflight);
- wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
+ if (!atomic_dec_and_test(&ctx->num_inflight))
+ wait_for_completion(&ctx->inflight_done);
}
EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
static void mlx5_cmd_exec_cb_handler(int status, void *_work)
{
struct mlx5_async_work *work = _work;
- struct mlx5_async_ctx *ctx = work->ctx;
+ struct mlx5_async_ctx *ctx;
+ ctx = work->ctx;
+ status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
work->user_callback(status, work);
if (atomic_dec_and_test(&ctx->num_inflight))
- wake_up(&ctx->wait);
+ complete(&ctx->inflight_done);
}
int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
@@ -1931,28 +2048,19 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
work->ctx = ctx;
work->user_callback = callback;
+ work->opcode = MLX5_GET(mbox_in, in, opcode);
+ work->out = out;
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
return -EIO;
ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
mlx5_cmd_exec_cb_handler, work, false);
if (ret && atomic_dec_and_test(&ctx->num_inflight))
- wake_up(&ctx->wait);
+ complete(&ctx->inflight_done);
return ret;
}
EXPORT_SYMBOL(mlx5_cmd_exec_cb);
-int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
- void *out, int out_size)
-{
- int err;
-
- err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
-
- return err ? : mlx5_cmd_check(dev, in, out);
-}
-EXPORT_SYMBOL(mlx5_cmd_exec_polling);
-
static void destroy_msg_cache(struct mlx5_core_dev *dev)
{
struct cmd_msg_cache *ch;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 5371ad0a12eb..4caa1b6f40ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/mlx5/driver.h>
#include <rdma/ib_verbs.h>
@@ -86,8 +85,9 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}
-int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
- u32 *in, int inlen, u32 *out, int outlen)
+/* Callers must verify outbox status in case of err */
+int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen, u32 *out, int outlen)
{
int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
c_eqn_or_apu_element);
@@ -101,7 +101,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
memset(out, 0, outlen);
MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
if (err)
return err;
@@ -148,6 +148,16 @@ err_cmd:
mlx5_cmd_exec_in(dev, destroy_cq, din);
return err;
}
+EXPORT_SYMBOL(mlx5_create_cq);
+
+/* oubox is checked and err val is normalized */
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen, u32 *out, int outlen)
+{
+ int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen);
+
+ return mlx5_cmd_check(dev, err, in, out);
+}
EXPORT_SYMBOL(mlx5_core_create_cq);
int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 10d195042ab5..3e232a65a0c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/cq.h>
@@ -99,26 +98,32 @@ void mlx5_unregister_debugfs(void)
debugfs_remove(mlx5_debugfs_root);
}
+struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev)
+{
+ return dev->priv.dbg.dbg_root;
+}
+EXPORT_SYMBOL(mlx5_debugfs_get_dev_root);
+
void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
{
- dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root);
+ dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root);
}
EXPORT_SYMBOL(mlx5_qp_debugfs_init);
void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.qp_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.qp_debugfs);
}
EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup);
void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
{
- dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root);
+ dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root);
}
void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.eq_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.eq_debugfs);
}
static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
@@ -161,6 +166,28 @@ static const struct file_operations stats_fops = {
.write = average_write,
};
+static ssize_t slots_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cmd *cmd;
+ char tbuf[6];
+ int weight;
+ int field;
+ int ret;
+
+ cmd = filp->private_data;
+ weight = bitmap_weight(&cmd->bitmask, cmd->max_reg_cmds);
+ field = cmd->max_reg_cmds - weight;
+ ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations slots_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = slots_read,
+};
+
void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
{
struct mlx5_cmd_stats *stats;
@@ -168,8 +195,10 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
const char *namep;
int i;
- cmd = &dev->priv.cmdif_debugfs;
- *cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
+ cmd = &dev->priv.dbg.cmdif_debugfs;
+ *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root);
+
+ debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops);
for (i = 0; i < MLX5_CMD_OP_MAX; i++) {
stats = &dev->cmd.stats[i];
@@ -180,23 +209,53 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
debugfs_create_file("average", 0400, stats->root, stats,
&stats_fops);
debugfs_create_u64("n", 0400, stats->root, &stats->n);
+ debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
+ debugfs_create_u64("failed_mbox_status", 0400, stats->root,
+ &stats->failed_mbox_status);
+ debugfs_create_u32("last_failed_errno", 0400, stats->root,
+ &stats->last_failed_errno);
+ debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
+ &stats->last_failed_mbox_status);
+ debugfs_create_x32("last_failed_syndrome", 0400, stats->root,
+ &stats->last_failed_syndrome);
}
}
}
void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs);
}
void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
{
- dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root);
+ dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root);
}
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.cq_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.cq_debugfs);
+}
+
+void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
+{
+ struct dentry *pages;
+
+ dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root);
+ pages = dev->priv.dbg.pages_debugfs;
+
+ debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
+ debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages);
+ debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages);
+ debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
+ debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);
+ debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages,
+ &dev->priv.reclaim_pages_discard);
+}
+
+void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.pages_debugfs);
}
static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
@@ -441,7 +500,7 @@ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
if (!mlx5_debugfs_root)
return 0;
- err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
+ err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs,
&qp->dbg, qp->qpn, qp_fields,
ARRAY_SIZE(qp_fields), qp);
if (err)
@@ -468,7 +527,7 @@ int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (!mlx5_debugfs_root)
return 0;
- err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
+ err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs,
&eq->dbg, eq->eqn, eq_fields,
ARRAY_SIZE(eq_fields), eq);
if (err)
@@ -493,7 +552,7 @@ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
if (!mlx5_debugfs_root)
return 0;
- err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
+ err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs,
&cq->dbg, cq->cqn, cq_fields,
ARRAY_SIZE(cq_fields), cq);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index a8b84d53dfb0..0571e40c6ee5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -340,8 +340,10 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
struct auxiliary_driver *adrv;
int ret = 0, i;
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&mlx5_intf_mutex);
priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
if (!priv->adev[i]) {
bool is_supported = false;
@@ -389,6 +391,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
break;
}
}
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
mutex_unlock(&mlx5_intf_mutex);
return ret;
}
@@ -401,7 +404,9 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
pm_message_t pm = {};
int i;
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&mlx5_intf_mutex);
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
if (!priv->adev[i])
continue;
@@ -430,6 +435,7 @@ skip_suspend:
del_adev(&priv->adev[i]->adev);
priv->adev[i] = NULL;
}
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
priv->flags |= MLX5_PRIV_FLAGS_DETACH;
mutex_unlock(&mlx5_intf_mutex);
}
@@ -438,6 +444,7 @@ int mlx5_register_device(struct mlx5_core_dev *dev)
{
int ret;
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&mlx5_intf_mutex);
dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
ret = mlx5_rescan_drivers_locked(dev);
@@ -450,6 +457,7 @@ int mlx5_register_device(struct mlx5_core_dev *dev)
void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&mlx5_intf_mutex);
dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
mlx5_rescan_drivers_locked(dev);
@@ -526,19 +534,25 @@ del_adev:
int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
+ int err = 0;
lockdep_assert_held(&mlx5_intf_mutex);
if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
return 0;
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
delete_drivers(dev);
if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
- return 0;
+ goto out;
+
+ err = add_drivers(dev);
- return add_drivers(dev);
+out:
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ return err;
}
-static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
{
u64 fsystem_guid, psystem_guid;
@@ -555,12 +569,9 @@ static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev)
PCI_SLOT(dev->pdev->devfn));
}
-static int next_phys_dev(struct device *dev, const void *data)
+static int _next_phys_dev(struct mlx5_core_dev *mdev,
+ const struct mlx5_core_dev *curr)
{
- struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev);
- struct mlx5_core_dev *mdev = madev->mdev;
- const struct mlx5_core_dev *curr = data;
-
if (!mlx5_core_is_pf(mdev))
return 0;
@@ -574,22 +585,52 @@ static int next_phys_dev(struct device *dev, const void *data)
return 1;
}
-/* Must be called with intf_mutex held */
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+static void *pci_get_other_drvdata(struct device *this, struct device *other)
{
- struct auxiliary_device *adev;
- struct mlx5_adev *madev;
+ if (this->driver != other->driver)
+ return NULL;
+
+ return pci_get_drvdata(to_pci_dev(other));
+}
+
+static int next_phys_dev_lag(struct device *dev, const void *data)
+{
+ struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
+
+ mdev = pci_get_other_drvdata(this->device, dev);
+ if (!mdev)
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager) ||
+ !MLX5_CAP_GEN(mdev, lag_master) ||
+ (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(mdev, num_lag_ports) <= 1))
+ return 0;
+
+ return _next_phys_dev(mdev, data);
+}
+
+static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev,
+ int (*match)(struct device *dev, const void *data))
+{
+ struct device *next;
if (!mlx5_core_is_pf(dev))
return NULL;
- adev = auxiliary_find_device(NULL, dev, &next_phys_dev);
- if (!adev)
+ next = bus_find_device(&pci_bus_type, NULL, dev, match);
+ if (!next)
return NULL;
- madev = container_of(adev, struct mlx5_adev, adev);
- put_device(&adev->dev);
- return madev->mdev;
+ put_device(next);
+ return pci_get_drvdata(to_pci_dev(next));
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
+{
+ lockdep_assert_held(&mlx5_intf_mutex);
+ return mlx5_get_next_dev(dev, &next_phys_dev_lag);
}
void mlx5_dev_list_lock(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 1c98652b244a..66c6a7017695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -100,14 +100,19 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli
}
net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
- err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive);
+ err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack);
if (err)
- goto out;
+ return err;
err = mlx5_fw_reset_wait_reset_done(dev);
-out:
if (err)
- NL_SET_ERR_MSG_MOD(extack, "FW activate command failed");
+ return err;
+
+ mlx5_unload_one_devl_locked(dev);
+ err = mlx5_health_wait_pci_up(dev);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");
+
return err;
}
@@ -138,6 +143,7 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct pci_dev *pdev = dev->pdev;
bool sf_dev_allocated;
+ int ret = 0;
sf_dev_allocated = mlx5_sf_dev_allocated(dev);
if (sf_dev_allocated) {
@@ -160,17 +166,21 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
- mlx5_unload_one(dev);
- return 0;
+ mlx5_unload_one_devl_locked(dev);
+ break;
case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
- return mlx5_devlink_trigger_fw_live_patch(devlink, extack);
- return mlx5_devlink_reload_fw_activate(devlink, extack);
+ ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack);
+ else
+ ret = mlx5_devlink_reload_fw_activate(devlink, extack);
+ break;
default:
/* Unsupported action should not get to this function */
WARN_ON(1);
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
}
+
+ return ret;
}
static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
@@ -178,24 +188,27 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int ret = 0;
*actions_performed = BIT(action);
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
- return mlx5_load_one(dev);
+ ret = mlx5_load_one_devl_locked(dev, false);
+ break;
case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
break;
/* On fw_activate action, also driver is reloaded and reinit performed */
*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
- return mlx5_load_one(dev);
+ ret = mlx5_load_one_devl_locked(dev, false);
+ break;
default:
/* Unsupported action should not get to this function */
WARN_ON(1);
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
}
- return 0;
+ return ret;
}
static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id)
@@ -546,6 +559,13 @@ static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32
return 0;
}
+static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL;
+}
+
static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
"flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
@@ -570,6 +590,10 @@ static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
mlx5_devlink_enable_remote_dev_reset_get,
mlx5_devlink_enable_remote_dev_reset_set, NULL),
+ DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
+ DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
};
static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
@@ -577,14 +601,6 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
struct mlx5_core_dev *dev = devlink_priv(devlink);
union devlink_param_value value;
- if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS)
- strcpy(value.vstr, "dmfs");
- else
- strcpy(value.vstr, "smfs");
- devlink_param_driverinit_value_set(devlink,
- MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
- value);
-
value.vbool = MLX5_CAP_GEN(dev, roce);
devlink_param_driverinit_value_set(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
@@ -595,19 +611,17 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
devlink_param_driverinit_value_set(devlink,
MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
value);
-
- if (MLX5_ESWITCH_MANAGER(dev)) {
- if (mlx5_esw_vport_match_metadata_supported(dev->priv.eswitch)) {
- dev->priv.eswitch->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
- value.vbool = true;
- } else {
- value.vbool = false;
- }
- devlink_param_driverinit_value_set(devlink,
- MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
- value);
- }
#endif
+
+ value.vu32 = MLX5_COMP_EQ_SIZE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ value);
+
+ value.vu32 = MLX5_NUM_ASYNC_EQE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ value);
}
static const struct devlink_param enable_eth_param =
@@ -752,6 +766,66 @@ static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
mlx5_devlink_eth_param_unregister(devlink);
}
+static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (val.vu32 == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0");
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(val.vu32)) {
+ NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs");
+ return -EINVAL;
+ }
+
+ if (ilog2(val.vu32) >
+ MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct devlink_param max_uc_list_param =
+ DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_max_uc_list_validate);
+
+static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported))
+ return 0;
+
+ err = devlink_param_register(devlink, &max_uc_list_param);
+ if (err)
+ return err;
+
+ value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list);
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ value);
+ return 0;
+}
+
+static void
+mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported))
+ return;
+
+ devlink_param_unregister(devlink, &max_uc_list_param);
+}
+
#define MLX5_TRAP_DROP(_id, _group_id) \
DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \
DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
@@ -771,28 +845,28 @@ static int mlx5_devlink_traps_register(struct devlink *devlink)
struct mlx5_core_dev *core_dev = devlink_priv(devlink);
int err;
- err = devlink_trap_groups_register(devlink, mlx5_trap_groups_arr,
- ARRAY_SIZE(mlx5_trap_groups_arr));
+ err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
if (err)
return err;
- err = devlink_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr),
- &core_dev->priv);
+ err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr),
+ &core_dev->priv);
if (err)
goto err_trap_group;
return 0;
err_trap_group:
- devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
- ARRAY_SIZE(mlx5_trap_groups_arr));
+ devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
return err;
}
static void mlx5_devlink_traps_unregister(struct devlink *devlink)
{
- devlink_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr));
- devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
- ARRAY_SIZE(mlx5_trap_groups_arr));
+ devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr));
+ devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
}
int mlx5_devlink_register(struct devlink *devlink)
@@ -811,6 +885,10 @@ int mlx5_devlink_register(struct devlink *devlink)
if (err)
goto auxdev_reg_err;
+ err = mlx5_devlink_max_uc_list_param_register(devlink);
+ if (err)
+ goto max_uc_list_err;
+
err = mlx5_devlink_traps_register(devlink);
if (err)
goto traps_reg_err;
@@ -821,6 +899,8 @@ int mlx5_devlink_register(struct devlink *devlink)
return 0;
traps_reg_err:
+ mlx5_devlink_max_uc_list_param_unregister(devlink);
+max_uc_list_err:
mlx5_devlink_auxdev_params_unregister(devlink);
auxdev_reg_err:
devlink_params_unregister(devlink, mlx5_devlink_params,
@@ -831,6 +911,7 @@ auxdev_reg_err:
void mlx5_devlink_unregister(struct devlink *devlink)
{
mlx5_devlink_traps_unregister(devlink);
+ mlx5_devlink_max_uc_list_param_unregister(devlink);
mlx5_devlink_auxdev_params_unregister(devlink);
devlink_params_unregister(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 7841ef6c193c..c5bb79a4fa57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -259,6 +259,9 @@ const char *parse_fs_dst(struct trace_seq *p,
case MLX5_FLOW_DESTINATION_TYPE_PORT:
trace_seq_printf(p, "port\n");
break;
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ trace_seq_printf(p, "none\n");
+ break;
}
trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index eae9aa9c0811..978a2bb8e122 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -675,6 +675,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
if (!tracer->owner)
return;
+ if (unlikely(!tracer->str_db.loaded))
+ goto arm;
+
block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
@@ -732,6 +735,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
&tmp_trace_block[TRACES_PER_BLOCK - 1]);
}
+arm:
mlx5_fw_tracer_arm(dev);
}
@@ -1136,8 +1140,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void
queue_work(tracer->work_queue, &tracer->ownership_change_work);
break;
case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
- if (likely(tracer->str_db.loaded))
- queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
break;
default:
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
index 538adab6878b..c5b560a8b026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
struct mlx5_rsc_dump {
u32 pdn;
u32 mkey;
+ u32 number_of_menu_items;
u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
};
@@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
return -EINVAL;
}
-static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page)
+#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
+
+static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
+ int read_size, int start_idx)
{
void *data = page_address(page);
enum mlx5_sgmt_type sgmt_idx;
int num_of_items;
char *sgmt_name;
void *member;
+ int size = 0;
void *menu;
int i;
- menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
- num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records);
+ if (!start_idx) {
+ menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+ rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
+ num_of_records);
+ size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
+ data += size;
+ }
+ num_of_items = rsc_dump->number_of_menu_items;
+
+ for (i = 0; start_idx + i < num_of_items; i++) {
+ size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
+ if (size >= read_size)
+ return start_idx + i;
- for (i = 0; i < num_of_items; i++) {
- member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]);
+ member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
if (sgmt_idx == -EINVAL)
@@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct
rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
member, segment_type);
}
+ return 0;
}
static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
@@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
struct mlx5_rsc_dump_cmd *cmd = NULL;
struct mlx5_rsc_key key = {};
struct page *page;
+ int start_idx = 0;
int size;
int err;
@@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
if (err < 0)
goto destroy_cmd;
- mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page);
+ start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
} while (err > 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f0ac6b0d9653..26a23047f1f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -59,6 +59,7 @@
#include "lib/hv_vhca.h"
#include "lib/clock.h"
#include "en/rx_res.h"
+#include "en/selq.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -92,29 +93,26 @@ struct page_pool;
#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
-#define MLX5_MPWRQ_LOG_WQE_SZ 18
-#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
- MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
-#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
-
-#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8))
-#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2)
-#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
-/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
- * WQEs, This page will absorb write overflow by the hardware, when
- * receiving packets larger than MTU. These oversize packets are
- * dropped by the driver at a later stage.
+#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18
+
+/* Keep in sync with mlx5e_mpwrq_log_wqe_sz.
+ * These are theoretical maximums, which can be further restricted by
+ * capabilities. These values are used for static resource allocations and
+ * sanity checks.
+ * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE
+ * size actually used at runtime, but it's not a problem when calculating static
+ * array sizes.
*/
-#define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1))
-#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
+#define MLX5_UMR_MAX_MTT_SPACE \
+ (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \
+ MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \
+ rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt))
+
#define MLX5E_MAX_RQ_NUM_MTTS \
- ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+ (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */
+#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
- (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
-#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
- (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
- (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
#define MLX5E_LOG_MAX_RX_WQE_BULK \
@@ -126,8 +124,7 @@ struct page_pool;
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \
- MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
@@ -145,18 +142,10 @@ struct page_pool;
#define MLX5E_MIN_NUM_CHANNELS 0x1
#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
-#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
#define MLX5E_TX_XSK_POLL_BUDGET 64
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
-#define MLX5E_UMR_WQE_INLINE_SZ \
- (sizeof(struct mlx5e_umr_wqe) + \
- ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
- MLX5_UMR_MTT_ALIGNMENT))
-#define MLX5E_UMR_WQEBBS \
- (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
-
#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
(sizeof(struct mlx5e_umr_wqe) +\
(sizeof(struct mlx5_klm) * (sgl_len)))
@@ -174,7 +163,7 @@ struct page_pool;
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
#define MLX5E_MAX_KLM_PER_WQE(mdev) \
- MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
+ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
@@ -188,12 +177,6 @@ do { \
#define mlx5e_state_dereference(priv, p) \
rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
-enum mlx5e_rq_group {
- MLX5E_RQ_GROUP_REGULAR,
- MLX5E_RQ_GROUP_XSK,
-#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
-};
-
static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
{
if (mlx5_lag_is_lacp_owner(mdev))
@@ -222,10 +205,40 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
+/* The maximum WQE size can be retrieved by max_wqe_sz_sq in
+ * bytes units. Driver hardens the limitation to 1KB (16
+ * WQEBBs), unless firmware capability is stricter.
+ */
+static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev)
+{
+ BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX);
+
+ return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS,
+ MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
+}
+
+static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev)
+{
+/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
+ * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
+ * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64)
+ * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower
+ * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
+ * cache-aligned.
+ */
+ u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+
+ wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
+#if L1_CACHE_BYTES >= 128
+ wqebbs = ALIGN_DOWN(wqebbs, 2);
+#endif
+ return wqebbs;
+}
+
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
- struct mlx5_wqe_data_seg data[0];
+ struct mlx5_wqe_data_seg data[];
};
struct mlx5e_rx_wqe_ll {
@@ -242,8 +255,9 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
union {
- struct mlx5_mtt inline_mtts[0];
- struct mlx5_klm inline_klms[0];
+ DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
+ DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
+ DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms);
};
};
@@ -294,7 +308,8 @@ struct mlx5e_params {
u8 num_tc;
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
struct {
- struct mlx5e_mqprio_rl *rl;
+ u64 max_rate[TC_MAX_QUEUE];
+ u32 hw_id[TC_MAX_QUEUE];
} channel;
} mqprio;
bool rx_cqe_compress_def;
@@ -327,7 +342,6 @@ enum {
MLX5E_RQ_STATE_AM,
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
- MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
};
@@ -378,6 +392,7 @@ enum {
MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
MLX5E_SQ_STATE_PENDING_XSK_TX,
MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
+ MLX5E_SQ_STATE_XDP_MULTIBUF,
};
struct mlx5e_tx_mpwqe {
@@ -428,12 +443,12 @@ struct mlx5e_txqsq {
struct netdev_queue *txq;
u32 sqn;
u16 stop_room;
+ u8 max_sq_mpw_wqebbs;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
unsigned long state;
unsigned int hw_mtu;
- struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
struct net_device *netdev;
struct mlx5_core_dev *mdev;
@@ -449,12 +464,9 @@ struct mlx5e_txqsq {
cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
-struct mlx5e_dma_info {
- dma_addr_t addr;
- union {
- struct page *page;
- struct xdp_buff *xsk;
- };
+union mlx5e_alloc_unit {
+ struct page *page;
+ struct xdp_buff *xsk;
};
/* XDP packets can be transmitted in different ways. On completion, we need to
@@ -487,7 +499,7 @@ struct mlx5e_xdp_info {
} frame;
struct {
struct mlx5e_rq *rq;
- struct mlx5e_dma_info di;
+ struct page *page;
} page;
};
};
@@ -509,7 +521,7 @@ struct mlx5e_xdpsq;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xmit_data *,
- struct mlx5e_xdp_info *,
+ struct skb_shared_info *,
int);
struct mlx5e_xdpsq {
@@ -541,6 +553,8 @@ struct mlx5e_xdpsq {
u32 sqn;
struct device *pdev;
__be32 mkey_be;
+ u16 stop_room;
+ u8 max_sq_mpw_wqebbs;
u8 min_inline_mode;
unsigned long state;
unsigned int hw_mtu;
@@ -581,19 +595,15 @@ struct mlx5e_icosq {
} ____cacheline_aligned_in_smp;
struct mlx5e_wqe_frag_info {
- struct mlx5e_dma_info *di;
+ union mlx5e_alloc_unit *au;
u32 offset;
bool last_in_page;
};
-struct mlx5e_umr_dma_info {
- struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
-};
-
struct mlx5e_mpw_info {
- struct mlx5e_umr_dma_info umr;
u16 consumed_strides;
- DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
+ union mlx5e_alloc_unit alloc_units[];
};
#define MLX5E_MAX_RX_FRAGS 4
@@ -601,13 +611,13 @@ struct mlx5e_mpw_info {
/* a single cache unit is capable to serve one napi call (for non-striding rq)
* or a MPWQE (for striding rq).
*/
-#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
- MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+ MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
struct mlx5e_page_cache {
u32 head;
u32 tail;
- struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
+ struct page *page_cache[MLX5E_CACHE_SIZE];
};
struct mlx5e_rq;
@@ -616,8 +626,8 @@ typedef struct sk_buff *
(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
typedef struct sk_buff *
-(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
@@ -640,6 +650,12 @@ struct mlx5e_rq_frags_info {
u8 num_frags;
u8 log_num_frags;
u8 wqe_bulk;
+ u8 wqe_index_mask;
+};
+
+struct mlx5e_dma_info {
+ dma_addr_t addr;
+ struct page *page;
};
struct mlx5e_shampo_hd {
@@ -661,13 +677,20 @@ struct mlx5e_hw_gro_data {
int second_ip_id;
};
+enum mlx5e_mpwrq_umr_mode {
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED,
+ MLX5E_MPWRQ_UMR_MODE_UNALIGNED,
+ MLX5E_MPWRQ_UMR_MODE_OVERSIZED,
+ MLX5E_MPWRQ_UMR_MODE_TRIPLE,
+};
+
struct mlx5e_rq {
/* data path */
union {
struct {
struct mlx5_wq_cyc wq;
struct mlx5e_wqe_frag_info *frags;
- struct mlx5e_dma_info *di;
+ union mlx5e_alloc_unit *alloc_units;
struct mlx5e_rq_frags_info info;
mlx5e_fp_skb_from_cqe skb_from_cqe;
} wqe;
@@ -676,12 +699,19 @@ struct mlx5e_rq {
struct mlx5e_umr_wqe umr_wqe;
struct mlx5e_mpw_info *info;
mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
+ __be32 umr_mkey_be;
u16 num_strides;
u16 actual_wq_head;
u8 log_stride_sz;
u8 umr_in_progress;
u8 umr_last_bulk;
u8 umr_completed;
+ u8 min_wqe_bulk;
+ u8 page_shift;
+ u8 pages_per_wqe;
+ u8 umr_wqebbs;
+ u8 mtts_per_wqe;
+ u8 umr_mode;
struct mlx5e_shampo_hd *shampo;
} mpwqe;
};
@@ -731,7 +761,7 @@ struct mlx5e_rq {
u8 wq_type;
u32 rqn;
struct mlx5_core_dev *mdev;
- u32 umr_mkey;
+ struct mlx5e_channel *channel;
struct mlx5e_dma_info wqe_overflow;
/* XDP read-mostly */
@@ -783,6 +813,8 @@ struct mlx5e_channel {
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int cpu;
+ /* Sync between icosq recovery and XSK enable/disable. */
+ struct mutex icosq_recovery_lock;
};
struct mlx5e_ptp;
@@ -818,11 +850,6 @@ enum {
MLX5E_STATE_XDP_ACTIVE,
};
-enum {
- MLX5E_TC_PRIO = 0,
- MLX5E_NIC_PRIO
-};
-
struct mlx5e_modify_sq_param {
int curr_state;
int next_state;
@@ -862,24 +889,13 @@ struct mlx5e_scratchpad {
cpumask_var_t cpumask;
};
-struct mlx5e_htb {
- DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
- DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
- struct mlx5e_sq_stats **qos_sq_stats;
- u16 max_qos_sqs;
- u16 maj_id;
- u16 defcls;
-};
-
struct mlx5e_trap;
+struct mlx5e_htb;
struct mlx5e_priv {
/* priv data path fields - start */
- /* +1 for port ptp ts */
- struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC +
- MLX5E_QOS_MAX_LEAF_NODES];
- int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
- int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_selq selq;
+ struct mlx5e_txqsq **txq2sq;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx_dp dcbx_dp;
#endif
@@ -893,9 +909,9 @@ struct mlx5e_priv {
struct mlx5e_channels channels;
u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
struct mlx5e_rx_res *rx_res;
- u32 tx_rates[MLX5E_MAX_NUM_SQS];
+ u32 *tx_rates;
- struct mlx5e_flow_steering fs;
+ struct mlx5e_flow_steering *fs;
struct workqueue_struct *wq;
struct work_struct update_carrier_work;
@@ -909,9 +925,11 @@ struct mlx5e_priv {
struct net_device *netdev;
struct mlx5e_trap *en_trap;
struct mlx5e_stats stats;
- struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_channel_stats **channel_stats;
struct mlx5e_channel_stats trap_stats;
struct mlx5e_ptp_stats ptp_stats;
+ struct mlx5e_sq_stats **htb_qos_sq_stats;
+ u16 htb_max_qos_sqs;
u16 stats_nch;
u16 max_nch;
u8 max_opened_tc;
@@ -922,7 +940,6 @@ struct mlx5e_priv {
u16 drop_rq_q_counter;
struct notifier_block events_nb;
struct notifier_block blocking_events_nb;
- int num_tc_x_num_ch;
struct udp_tunnel_nic_info nic_info;
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -931,6 +948,9 @@ struct mlx5e_priv {
const struct mlx5e_profile *profile;
void *ppriv;
+#ifdef CONFIG_MLX5_EN_MACSEC
+ struct mlx5e_macsec *macsec;
+#endif
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5e_ipsec *ipsec;
#endif
@@ -944,7 +964,7 @@ struct mlx5e_priv {
struct mlx5e_hv_vhca_stats_agent stats_agent;
#endif
struct mlx5e_scratchpad scratchpad;
- struct mlx5e_htb htb;
+ struct mlx5e_htb *htb;
struct mlx5e_mqprio_rl *mqprio_rl;
};
@@ -956,6 +976,14 @@ struct mlx5e_rx_handlers {
extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
+enum mlx5e_profile_feature {
+ MLX5E_PROFILE_FEATURE_PTP_RX,
+ MLX5E_PROFILE_FEATURE_PTP_TX,
+ MLX5E_PROFILE_FEATURE_QOS_HTB,
+ MLX5E_PROFILE_FEATURE_FS_VLAN,
+ MLX5E_PROFILE_FEATURE_FS_TC,
+};
+
struct mlx5e_profile {
int (*init)(struct mlx5_core_dev *mdev,
struct net_device *netdev);
@@ -969,23 +997,26 @@ struct mlx5e_profile {
int (*update_rx)(struct mlx5e_priv *priv);
void (*update_stats)(struct mlx5e_priv *priv);
void (*update_carrier)(struct mlx5e_priv *priv);
+ int (*max_nch_limit)(struct mlx5_core_dev *mdev);
unsigned int (*stats_grps_num)(struct mlx5e_priv *priv);
mlx5e_stats_grp_t *stats_grps;
const struct mlx5e_rx_handlers *rx_handlers;
int max_tc;
- u8 rq_groups;
- bool rx_ptp_support;
+ u32 features;
};
+#define mlx5e_profile_feature_cap(profile, feature) \
+ ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
+
void mlx5e_build_ptys2ethtool_map(void);
-bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
-void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
int mlx5e_self_test_num(struct mlx5e_priv *priv);
int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data);
void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
@@ -1008,15 +1039,13 @@ struct mlx5e_rq_param;
int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
struct mlx5e_xsk_param *xsk, int node,
struct mlx5e_rq *rq);
+#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
void mlx5e_close_rq(struct mlx5e_rq *rq);
int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
void mlx5e_destroy_rq(struct mlx5e_rq *rq);
struct mlx5e_sq_param;
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
-void mlx5e_close_icosq(struct mlx5e_icosq *sq);
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect);
@@ -1038,6 +1067,9 @@ void mlx5e_close_cq(struct mlx5e_cq *cq);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
+void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c);
+void mlx5e_trigger_napi_sched(struct napi_struct *napi);
+
int mlx5e_open_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *chs);
void mlx5e_close_channels(struct mlx5e_channels *chs);
@@ -1057,13 +1089,12 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
mlx5e_fp_preactivate preactivate,
void *context, bool reset);
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
-int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
@@ -1098,6 +1129,7 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
extern const struct ethtool_ops mlx5e_ethtool_ops;
+int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey);
int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
@@ -1110,8 +1142,6 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
struct mlx5e_rq *drop_rq);
void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
-int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
-void mlx5e_free_di_list(struct mlx5e_rq *rq);
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
@@ -1140,7 +1170,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset);
void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
struct ethtool_stats *stats, u64 *data);
void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
- struct ethtool_ringparam *param);
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param);
int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
struct ethtool_ringparam *param);
void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
@@ -1148,9 +1179,12 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch);
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal);
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal);
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal);
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack);
int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
struct ethtool_link_ksettings *link_ksettings);
int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
@@ -1180,14 +1214,14 @@ mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
}
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev);
int mlx5e_priv_init(struct mlx5e_priv *priv,
const struct mlx5e_profile *profile,
struct net_device *netdev,
struct mlx5_core_dev *mdev);
void mlx5e_priv_cleanup(struct mlx5e_priv *priv);
struct net_device *
-mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
- unsigned int txqs, unsigned int rxqs);
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile);
int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
index e7c14c0de0a7..48581ea3adcb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -10,28 +10,33 @@ unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
return chs->num;
}
-void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
{
- struct mlx5e_channel *c;
+ WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
+ return chs->c[ix];
+}
- WARN_ON(ix >= mlx5e_channels_get_num(chs));
- c = chs->c[ix];
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
- *rqn = c->rq.rqn;
+ return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
}
-bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
{
- struct mlx5e_channel *c;
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
- WARN_ON(ix >= mlx5e_channels_get_num(chs));
- c = chs->c[ix];
+ *rqn = c->rq.rqn;
+}
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
- return false;
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
*rqn = c->xskrq.rqn;
- return true;
}
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
index ca00cbc827cb..637ca90daaa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -9,8 +9,9 @@
struct mlx5e_channels;
unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
-bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
index 9976de8b9047..b59aee75de94 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -40,13 +40,11 @@ struct mlx5e_dcbx_dp {
};
void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
-void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev);
void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
#else
static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
-static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {}
static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index ae52e7f38306..b69f9d10ccbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -21,6 +21,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
struct netdev_phys_item_id ppid = {};
struct devlink_port *dl_port;
unsigned int dl_port_index;
+ int ret;
if (mlx5_core_is_pf(priv->mdev)) {
attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
@@ -41,7 +42,13 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
memset(dl_port, 0, sizeof(*dl_port));
devlink_port_attrs_set(dl_port, &attrs);
- return devlink_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ ret = devl_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+
+ return ret;
}
void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
@@ -54,8 +61,13 @@ void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
{
struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
- devlink_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ devl_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
}
struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 678ffbb48a25..bf2741eb7f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -8,32 +8,17 @@
#include "lib/fs_ttc.h"
struct mlx5e_post_act;
+struct mlx5e_tc_table;
enum {
MLX5E_TC_FT_LEVEL = 0,
MLX5E_TC_TTC_FT_LEVEL,
+ MLX5E_TC_MISS_LEVEL,
};
-struct mlx5e_tc_table {
- /* Protects the dynamic assignment of the t parameter
- * which is the nic tc root table.
- */
- struct mutex t_lock;
- struct mlx5_flow_table *t;
- struct mlx5_fs_chains *chains;
- struct mlx5e_post_act *post_act;
-
- struct rhashtable ht;
-
- struct mod_hdr_tbl mod_hdr;
- struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
- DECLARE_HASHTABLE(hairpin_tbl, 8);
-
- struct notifier_block netdevice_nb;
- struct netdev_net_notifier netdevice_nn;
-
- struct mlx5_tc_ct_priv *ct;
- struct mapping_ctx *mapping;
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
};
struct mlx5e_flow_table {
@@ -104,108 +89,116 @@ enum {
#endif
};
-struct mlx5e_priv;
-
-#ifdef CONFIG_MLX5_EN_RXNFC
-
-struct mlx5e_ethtool_table {
- struct mlx5_flow_table *ft;
- int num_rules;
-};
-
-#define ETHTOOL_NUM_L3_L4_FTS 7
-#define ETHTOOL_NUM_L2_FTS 4
-
-struct mlx5e_ethtool_steering {
- struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
- struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
- struct list_head rules;
- int tot_num_rules;
-};
-
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
-int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
-int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
- struct ethtool_rxnfc *info, u32 *rule_locs);
-#else
-static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { }
-static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
-static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
-{ return -EOPNOTSUPP; }
-static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
- struct ethtool_rxnfc *info, u32 *rule_locs)
-{ return -EOPNOTSUPP; }
-#endif /* CONFIG_MLX5_EN_RXNFC */
+struct mlx5e_flow_steering;
+struct mlx5e_rx_res;
#ifdef CONFIG_MLX5_EN_ARFS
struct mlx5e_arfs_tables;
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
-int mlx5e_arfs_enable(struct mlx5e_priv *priv);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv);
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple);
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple);
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs);
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs);
int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#else
-static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
-static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
-static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{ return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
#endif
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_accel_fs_tcp;
#endif
+struct mlx5e_profile;
struct mlx5e_fs_udp;
struct mlx5e_fs_any;
struct mlx5e_ptp_fs;
-struct mlx5e_flow_steering {
- struct mlx5_flow_namespace *ns;
- struct mlx5_flow_namespace *egress_ns;
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel);
+
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res);
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev);
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile);
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy);
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress);
#ifdef CONFIG_MLX5_EN_RXNFC
- struct mlx5e_ethtool_steering ethtool;
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs);
#endif
- struct mlx5e_tc_table tc;
- struct mlx5e_promisc_table promisc;
- struct mlx5e_vlan_table *vlan;
- struct mlx5e_l2_table l2;
- struct mlx5_ttc_table *ttc;
- struct mlx5_ttc_table *inner_ttc;
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner);
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner);
#ifdef CONFIG_MLX5_EN_ARFS
- struct mlx5e_arfs_tables *arfs;
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs);
#endif
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs);
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any);
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp);
#ifdef CONFIG_MLX5_EN_TLS
- struct mlx5e_accel_fs_tcp *accel_tcp;
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp);
#endif
- struct mlx5e_fs_udp *udp;
- struct mlx5e_fs_any *any;
- struct mlx5e_ptp_fs *ptp_fs;
-};
-
-void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
- struct ttc_params *ttc_params, bool tunnel);
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
-
-void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
-
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
-
-int mlx5e_fs_init(struct mlx5e_priv *priv);
-void mlx5e_fs_cleanup(struct mlx5e_priv *priv);
-
-int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
-void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
-int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
-void mlx5e_remove_mac_trap(struct mlx5e_priv *priv);
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy);
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable);
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs);
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs);
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+
+#define fs_err(fs, fmt, ...) \
+ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_dbg(fs, fmt, ...) \
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn(fs, fmt, ...) \
+ mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn_once(fs, fmt, ...) \
+ mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
#endif /* __MLX5E_FLOW_STEER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
new file mode 100644
index 000000000000..9e276fd3c0cf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5E_FS_ETHTOOL_H__
+#define __MLX5E_FS_ETHTOOL_H__
+
+struct mlx5e_priv;
+struct mlx5e_ethtool_steering;
+#ifdef CONFIG_MLX5_EN_RXNFC
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool);
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool);
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs);
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{ return 0; }
+static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { }
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index 7aa25a5e29d7..03cb79adf912 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
-#include <linux/netdevice.h>
#include "en/fs_tt_redirect.h"
#include "fs_core.h"
+#include "mlx5_core.h"
enum fs_udp_type {
FS_IPV4_UDP,
@@ -74,17 +74,17 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
}
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port)
{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
enum fs_udp_type type = tt2fs_udp(ttc_type);
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft = NULL;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_fs_udp *fs_udp;
int err;
if (type == FS_UDP_NUM_TYPES)
@@ -94,7 +94,6 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_udp = priv->fs.udp;
ft = fs_udp->tables[type].t;
fs_udp_set_dport_flow(spec, type, d_port);
@@ -106,31 +105,30 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n",
- __func__, fs_udp_type2str(type), err);
+ fs_err(fs, "%s: add %s rule failed, err %d\n",
+ __func__, fs_udp_type2str(type), err);
}
return rule;
}
-static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type)
+static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
struct mlx5e_flow_table *fs_udp_t;
struct mlx5_flow_destination dest;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
- struct mlx5e_fs_udp *fs_udp;
int err;
- fs_udp = priv->fs.udp;
fs_udp_t = &fs_udp->tables[type];
- dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
+ dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type));
rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, fs type=%d, err %d\n",
- __func__, type, err);
+ fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n",
+ __func__, type, err);
return err;
}
@@ -206,33 +204,36 @@ out:
return err;
}
-static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type)
+static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
{
- struct mlx5e_flow_table *ft = &priv->fs.udp->tables[type];
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
int err;
+ ft = &fs_udp->tables[type];
ft->num_groups = 0;
ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n",
- fs_udp_type2str(type), ft->t->id, ft->t->level);
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n",
+ fs_udp_type2str(type), ft->t->id, ft->t->level);
err = fs_udp_create_groups(ft, type);
if (err)
goto err;
- err = fs_udp_add_default_rule(priv, type);
+ err = fs_udp_add_default_rule(fs, type);
if (err)
goto err;
@@ -253,17 +254,17 @@ static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
fs_udp->tables[i].t = NULL;
}
-static int fs_udp_disable(struct mlx5e_priv *priv)
+static int fs_udp_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, fs_udp2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
return err;
}
}
@@ -271,30 +272,31 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
return 0;
}
-static int fs_udp_enable(struct mlx5e_priv *priv)
+static int fs_udp_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
- dest.ft = priv->fs.udp->tables[i].t;
+ dest.ft = udp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, fs_udp2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
return err;
}
}
return 0;
}
-void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_fs_udp *fs_udp = priv->fs.udp;
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
int i;
if (!fs_udp)
@@ -303,48 +305,50 @@ void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
if (--fs_udp->ref_cnt)
return;
- fs_udp_disable(priv);
+ fs_udp_disable(fs);
for (i = 0; i < FS_UDP_NUM_TYPES; i++)
fs_udp_destroy_table(fs_udp, i);
kfree(fs_udp);
- priv->fs.udp = NULL;
+ mlx5e_fs_set_udp(fs, NULL);
}
-int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv)
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
int i, err;
- if (priv->fs.udp) {
- priv->fs.udp->ref_cnt++;
+ if (udp) {
+ udp->ref_cnt++;
return 0;
}
- priv->fs.udp = kzalloc(sizeof(*priv->fs.udp), GFP_KERNEL);
- if (!priv->fs.udp)
+ udp = kzalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
return -ENOMEM;
+ mlx5e_fs_set_udp(fs, udp);
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
- err = fs_udp_create_table(priv, i);
+ err = fs_udp_create_table(fs, i);
if (err)
goto err_destroy_tables;
}
- err = fs_udp_enable(priv);
+ err = fs_udp_enable(fs);
if (err)
goto err_destroy_tables;
- priv->fs.udp->ref_cnt = 1;
+ udp->ref_cnt = 1;
return 0;
err_destroy_tables:
while (--i >= 0)
- fs_udp_destroy_table(priv->fs.udp, i);
+ fs_udp_destroy_table(udp, i);
- kfree(priv->fs.udp);
- priv->fs.udp = NULL;
+ kfree(udp);
+ mlx5e_fs_set_udp(fs, NULL);
return err;
}
@@ -356,22 +360,21 @@ static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_typ
}
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
u32 tir_num, u16 ether_type)
{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft = NULL;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_fs_any *fs_any;
int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_any = priv->fs.any;
ft = fs_any->table.t;
fs_any_set_ethertype_flow(spec, ether_type);
@@ -383,31 +386,29 @@ mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n",
- __func__, err);
+ fs_err(fs, "%s: add ANY rule failed, err %d\n",
+ __func__, err);
}
return rule;
}
-static int fs_any_add_default_rule(struct mlx5e_priv *priv)
+static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
struct mlx5e_flow_table *fs_any_t;
struct mlx5_flow_destination dest;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
- struct mlx5e_fs_any *fs_any;
int err;
- fs_any = priv->fs.any;
fs_any_t = &fs_any->table;
-
- dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
+ dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY);
rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, fs type=ANY, err %d\n",
- __func__, err);
+ fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n",
+ __func__, err);
return err;
}
@@ -472,9 +473,11 @@ err:
return err;
}
-static int fs_any_create_table(struct mlx5e_priv *priv)
+static int fs_any_create_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_flow_table *ft = &priv->fs.any->table;
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *ft = &fs_any->table;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -484,21 +487,21 @@ static int fs_any_create_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n",
- ft->t->id, ft->t->level);
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n",
+ ft->t->id, ft->t->level);
err = fs_any_create_groups(ft);
if (err)
goto err;
- err = fs_any_add_default_rule(priv);
+ err = fs_any_add_default_rule(fs);
if (err)
goto err;
@@ -509,35 +512,38 @@ err:
return err;
}
-static int fs_any_disable(struct mlx5e_priv *priv)
+static int fs_any_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err;
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
+ err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, MLX5_TT_ANY, err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
}
-static int fs_any_enable(struct mlx5e_priv *priv)
+static int fs_any_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs);
struct mlx5_flow_destination dest = {};
int err;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.any->table.t;
+ dest.ft = any->table.t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
+ err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, MLX5_TT_ANY, err);
+ fs_err(fs,
+ "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
@@ -553,9 +559,9 @@ static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
fs_any->table.t = NULL;
}
-void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_fs_any *fs_any = priv->fs.any;
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
if (!fs_any)
return;
@@ -563,43 +569,45 @@ void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
if (--fs_any->ref_cnt)
return;
- fs_any_disable(priv);
+ fs_any_disable(fs);
fs_any_destroy_table(fs_any);
kfree(fs_any);
- priv->fs.any = NULL;
+ mlx5e_fs_set_any(fs, NULL);
}
-int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv)
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
int err;
- if (priv->fs.any) {
- priv->fs.any->ref_cnt++;
+ if (fs_any) {
+ fs_any->ref_cnt++;
return 0;
}
- priv->fs.any = kzalloc(sizeof(*priv->fs.any), GFP_KERNEL);
- if (!priv->fs.any)
+ fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL);
+ if (!fs_any)
return -ENOMEM;
+ mlx5e_fs_set_any(fs, fs_any);
- err = fs_any_create_table(priv);
+ err = fs_any_create_table(fs);
if (err)
return err;
- err = fs_any_enable(priv);
+ err = fs_any_enable(fs);
if (err)
goto err_destroy_table;
- priv->fs.any->ref_cnt = 1;
+ fs_any->ref_cnt = 1;
return 0;
err_destroy_table:
- fs_any_destroy_table(priv->fs.any);
+ fs_any_destroy_table(fs_any);
- kfree(priv->fs.any);
- priv->fs.any = NULL;
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
index 7a70c4f38fda..5780fd7ad507 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -4,23 +4,22 @@
#ifndef __MLX5E_FS_TT_REDIRECT_H__
#define __MLX5E_FS_TT_REDIRECT_H__
-#include "en.h"
#include "en/fs.h"
void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
/* UDP traffic type redirect */
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port);
-void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
-int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs);
/* ANY traffic type redirect*/
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
u32 tir_num, u16 ether_type);
-void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv);
-int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index d5b7110a4265..0107e4e73bb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
new file mode 100644
index 000000000000..6dac76fa58a3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/pkt_cls.h>
+#include "htb.h"
+#include "en.h"
+#include "../qos.h"
+
+struct mlx5e_qos_node {
+ struct hlist_node hnode;
+ struct mlx5e_qos_node *parent;
+ u64 rate;
+ u32 bw_share;
+ u32 max_average_bw;
+ u32 hw_id;
+ u32 classid; /* 16-bit, except root. */
+ u16 qid;
+};
+
+struct mlx5e_htb {
+ DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+ DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ struct mlx5e_selq *selq;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+/* Software representation of the QoS tree */
+
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt, err;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ err = callback(data, node->qid, node->hw_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb)
+{
+ int last;
+
+ last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev));
+ return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1;
+}
+
+static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb)
+{
+ int size = mlx5e_qos_max_leaf_nodes(htb->mdev);
+ struct mlx5e_priv *priv = htb->priv;
+ int res;
+
+ WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+ res = find_first_zero_bit(htb->qos_used_qids, size);
+
+ return res == size ? -ENOSPC : res;
+}
+
+static struct mlx5e_qos_node *
+mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid,
+ struct mlx5e_qos_node *parent)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->parent = parent;
+
+ node->qid = qid;
+ __set_bit(qid, htb->qos_used_qids);
+
+ node->classid = classid;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, classid);
+
+ mlx5e_update_tx_netdev_queues(htb->priv);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->qid = MLX5E_QOS_QID_INNER;
+ node->classid = MLX5E_HTB_CLASSID_ROOT;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node)
+{
+ hash_del_rcu(&node->hnode);
+ if (node->qid != MLX5E_QOS_QID_INNER) {
+ __clear_bit(node->qid, htb->qos_used_qids);
+ mlx5e_update_tx_netdev_queues(htb->priv);
+ }
+ /* Make sure this qid is no longer selected by mlx5e_select_queue, so
+ * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
+ */
+ synchronize_net();
+ kfree(node);
+}
+
+/* TX datapath API */
+
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid)
+{
+ struct mlx5e_qos_node *node;
+ u16 qid;
+ int res;
+
+ rcu_read_lock();
+
+ node = mlx5e_htb_node_find_rcu(htb, classid);
+ if (!node) {
+ res = -ENOENT;
+ goto out;
+ }
+ qid = READ_ONCE(node->qid);
+ if (qid == MLX5E_QOS_QID_INNER) {
+ res = -EINVAL;
+ goto out;
+ }
+ res = mlx5e_qid_from_qos(&htb->priv->channels, qid);
+
+out:
+ rcu_read_unlock();
+ return res;
+}
+
+/* HTB TC handlers */
+
+static int
+mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+ mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls);
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+ if (err)
+ goto err_cancel_selq;
+ }
+
+ root = mlx5e_htb_node_create_root(htb);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto err_free_queues;
+ }
+
+ err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+ goto err_sw_node_delete;
+ }
+
+ mlx5e_selq_apply(htb->selq);
+
+ return 0;
+
+err_sw_node_delete:
+ mlx5e_htb_node_delete(htb, root);
+
+err_free_queues:
+ if (opened)
+ mlx5e_qos_close_all_queues(&priv->channels);
+err_cancel_selq:
+ mlx5e_selq_cancel(htb->selq);
+ return err;
+}
+
+static int mlx5e_htb_root_del(struct mlx5e_htb *htb)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_DESTROY\n");
+
+ /* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
+ * so that we can safely switch to its non-HTB non-PTP fastpath.
+ */
+ synchronize_net();
+
+ mlx5e_selq_prepare_htb(htb->selq, 0, 0);
+ mlx5e_selq_apply(htb->selq);
+
+ root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT);
+ if (!root) {
+ qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n");
+ return -ENOENT;
+ }
+ err = mlx5_qos_destroy_node(htb->mdev, root->hw_id);
+ if (err)
+ qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n",
+ root->hw_id, err);
+ mlx5e_htb_node_delete(htb, root);
+
+ mlx5e_qos_deactivate_all_queues(&priv->channels);
+ mlx5e_qos_close_all_queues(&priv->channels);
+
+ return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate,
+ struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+ u64 share = 0;
+
+ while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+ parent = parent->parent;
+
+ if (parent->max_average_bw)
+ share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+ parent->max_average_bw);
+ else
+ share = 101;
+
+ *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+ qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+ rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+ return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw)
+{
+ /* Hardware treats 0 as "unlimited", set at least 1. */
+ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+}
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ int qid;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+ classid, parent_classid, rate, ceil);
+
+ qid = mlx5e_htb_find_unused_qos_qid(htb);
+ if (qid < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+ return qid;
+ }
+
+ parent = mlx5e_htb_node_find(htb, parent_classid);
+ if (!parent)
+ return -EINVAL;
+
+ node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ mlx5e_htb_node_delete(htb, node);
+ return err;
+ }
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *child;
+ struct mlx5e_priv *priv = htb->priv;
+ int err, tmp_err;
+ u32 new_hw_id;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+ classid, child_classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+ qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ /* Intentionally reuse the qid for the upcoming first child. */
+ child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto err_destroy_hw_node;
+ }
+
+ child->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share,
+ child->max_average_bw, &child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ goto err_delete_sw_node;
+ }
+
+ /* No fail point. */
+
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, child->qid, child->hw_id);
+ }
+ }
+
+ return 0;
+
+err_delete_sw_node:
+ child->qid = MLX5E_QOS_QID_INNER;
+ mlx5e_htb_node_delete(htb, child);
+
+err_destroy_hw_node:
+ tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id);
+ if (tmp_err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+ new_hw_id, classid, tmp_err);
+ return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode)
+ if (node->qid == qid)
+ break;
+
+ return node;
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *node;
+ struct netdev_queue *txq;
+ u16 qid, moved_qid;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
+
+ node = mlx5e_htb_node_find(htb, *classid);
+ if (!node)
+ return -ENOENT;
+
+ /* Store qid for reuse. */
+ qid = node->qid;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, qid));
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, *classid, err);
+
+ mlx5e_htb_node_delete(htb, node);
+
+ moved_qid = mlx5e_htb_cur_leaf_nodes(htb);
+
+ if (moved_qid == 0) {
+ /* The last QoS SQ was just destroyed. */
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+ moved_qid--;
+
+ if (moved_qid < qid) {
+ /* The highest QoS SQ was just destroyed. */
+ WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+ qid, moved_qid);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+
+ WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+ qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+ node = mlx5e_htb_node_find_by_qid(htb, moved_qid);
+ WARN(!node, "Could not find a node with qid %u to move to queue %u",
+ moved_qid, qid);
+
+ /* Stop traffic to the old queue. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+ __clear_bit(moved_qid, priv->htb->qos_used_qids);
+
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, moved_qid));
+ mlx5e_deactivate_qos_sq(priv, moved_qid);
+ mlx5e_close_qos_sq(priv, moved_qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, moved_qid);
+
+ __set_bit(qid, htb->qos_used_qids);
+ WRITE_ONCE(node->qid, qid);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+ node->classid, moved_qid, qid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ mlx5e_update_tx_netdev_queues(priv);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+ *classid = node->classid;
+ return 0;
+}
+
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ u32 old_hw_id, new_hw_id;
+ int err, saved_err = 0;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+ force ? "_FORCE" : "", classid);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id,
+ node->parent->bw_share,
+ node->parent->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ if (!force)
+ return err;
+ saved_err = err;
+ }
+
+ /* Store qid for reuse and prevent clearing the bit. */
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, qid);
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ parent = node->parent;
+ mlx5e_htb_node_delete(htb, node);
+
+ node = parent;
+ WRITE_ONCE(node->qid, qid);
+
+ /* Early return on error in force mode. Parent will still be an inner
+ * node to be deleted by a following delete operation.
+ */
+ if (saved_err)
+ return saved_err;
+
+ old_hw_id = node->hw_id;
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, old_hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ return 0;
+}
+
+static int
+mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *child;
+ int err = 0;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, child, hnode) {
+ u32 old_bw_share = child->bw_share;
+ int err_one;
+
+ if (child->parent != node)
+ continue;
+
+ mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share);
+ if (child->bw_share == old_bw_share)
+ continue;
+
+ err_one = mlx5_qos_update_node(htb->mdev, child->hw_id, child->bw_share,
+ child->max_average_bw, child->hw_id);
+ if (!err && err_one) {
+ err = err_one;
+
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+ qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+ node->classid, err);
+ }
+ }
+
+ return err;
+}
+
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ u32 bw_share, max_average_bw;
+ struct mlx5e_qos_node *node;
+ bool ceil_changed = false;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+ classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw);
+
+ err = mlx5_qos_update_node(htb->mdev, node->parent->hw_id, bw_share,
+ max_average_bw, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+ qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ if (max_average_bw != node->max_average_bw)
+ ceil_changed = true;
+
+ node->bw_share = bw_share;
+ node->max_average_bw = max_average_bw;
+
+ if (ceil_changed)
+ err = mlx5e_htb_update_children(htb, node, extack);
+
+ return err;
+}
+
+struct mlx5e_htb *mlx5e_htb_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL);
+}
+
+void mlx5e_htb_free(struct mlx5e_htb *htb)
+{
+ kvfree(htb);
+}
+
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv)
+{
+ htb->mdev = mdev;
+ htb->netdev = netdev;
+ htb->selq = selq;
+ htb->priv = priv;
+ hash_init(htb->qos_tc2node);
+ return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->extack);
+}
+
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb)
+{
+ mlx5e_htb_root_del(htb);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
new file mode 100644
index 000000000000..8386f1ea4559
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5E_EN_HTB_H_
+#define __MLX5E_EN_HTB_H_
+
+#include "qos.h"
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_selq;
+struct mlx5e_htb;
+
+typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id);
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data);
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb);
+
+/* TX datapath API */
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid);
+
+/* HTB TC handlers */
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+struct mlx5e_htb *mlx5e_htb_alloc(void);
+void mlx5e_htb_free(struct mlx5e_htb *htb);
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv);
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb);
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index d290d7276b8d..b4f3bd7d346e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -20,7 +20,7 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
struct mlx5e_channel_stats *stats;
int tc;
- stats = &priv->channel_stats[ch];
+ stats = priv->channel_stats[ch];
data->rx_packets = stats->rq.packets;
data->rx_bytes = stats->rq.bytes;
@@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
cancel_delayed_work_sync(&priv->stats_agent.work);
}
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
- return -ENOMEM;
+ return;
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
@@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
PTR_ERR(agent));
kvfree(priv->stats_agent.buf);
- return IS_ERR_OR_NULL(agent);
+ return;
}
priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
-
- return 0;
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
index 664463faf77b..29c8c6d3260f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -7,19 +7,12 @@
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
#else
-
-static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
-{
- return 0;
-}
-
-static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
-{
-}
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
#endif
#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
index 7edde4d536fd..17325c5d6516 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
@@ -155,3 +155,61 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh)
return mh->modify_hdr;
}
+char *
+mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int new_num_actions, max_hw_actions;
+ size_t new_sz, old_sz;
+ void *ret;
+
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
+ goto out;
+
+ max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace);
+ new_num_actions = min(max_hw_actions,
+ mod_hdr_acts->actions ?
+ mod_hdr_acts->max_actions * 2 : 1);
+ if (mod_hdr_acts->max_actions == new_num_actions)
+ return ERR_PTR(-ENOSPC);
+
+ new_sz = MLX5_MH_ACT_SZ * new_num_actions;
+ old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ;
+
+ if (mod_hdr_acts->is_static) {
+ ret = kzalloc(new_sz, GFP_KERNEL);
+ if (ret) {
+ memcpy(ret, mod_hdr_acts->actions, old_sz);
+ mod_hdr_acts->is_static = false;
+ }
+ } else {
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
+ if (ret)
+ memset(ret + old_sz, 0, new_sz - old_sz);
+ }
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ mod_hdr_acts->actions = ret;
+ mod_hdr_acts->max_actions = new_num_actions;
+
+out:
+ return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void
+mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ if (!mod_hdr_acts->is_static)
+ kfree(mod_hdr_acts->actions);
+
+ mod_hdr_acts->actions = NULL;
+ mod_hdr_acts->num_actions = 0;
+ mod_hdr_acts->max_actions = 0;
+}
+
+char *
+mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos)
+{
+ return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
index 33b23d8f9182..b8dac418d0a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
@@ -7,14 +7,32 @@
#include <linux/hashtable.h>
#include <linux/mlx5/fs.h>
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
struct mlx5e_mod_hdr_handle;
struct mlx5e_tc_mod_hdr_acts {
int num_actions;
int max_actions;
+ bool is_static;
void *actions;
};
+#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \
+ u8 name[len][MLX5_MH_ACT_SZ] = {}
+
+#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \
+ struct mlx5e_tc_mod_hdr_acts name = { \
+ .max_actions = ARRAY_SIZE(acts_arr), \
+ .is_static = true, \
+ .actions = acts_arr, \
+ }
+
+char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos);
+
struct mlx5e_mod_hdr_handle *
mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
struct mod_hdr_tbl *tbl,
@@ -28,4 +46,12 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh);
void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl);
void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl);
+static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
#endif /* __MLX5E_EN_MOD_HDR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index f8c29022dbb2..29dd3a04c154 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -5,13 +5,213 @@
#include "en/txrx.h"
#include "en/port.h"
#include "en_accel/en_accel.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
+#include "en_accel/ipsec.h"
+#include <net/xdp_sock_drv.h>
-static bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
+{
+ u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);
+
+ return min_page_shift ? : 12;
+}
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
+
+ /* Regular RQ uses order-0 pages, the NIC must be able to map them. */
+ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
+ min_page_shift = req_page_shift;
+
+ return max(req_page_shift, min_page_shift);
+}
+
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ /* Different memory management schemes use different mechanisms to map
+ * user-mode memory. The stricter guarantees we have, the faster
+ * mechanisms we use:
+ * 1. MTT - direct mapping in page granularity.
+ * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but
+ * all mappings have the same size.
+ * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
+ * mappings can have different sizes.
+ */
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ bool oversized = false;
+
+ if (xsk) {
+ oversized = xsk->chunk_size < (1 << page_shift);
+ WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift));
+ }
+
+ /* XSK frame size doesn't match the UMR page size, either because the
+ * frame size is not a power of two, or it's smaller than the minimal
+ * page size supported by the firmware.
+ * It's possible to receive packets bigger than MTU in certain setups.
+ * To avoid writing over the XSK frame boundary, the top region of each
+ * stride is mapped to a garbage page, resulting in two mappings of
+ * different sizes per frame.
+ */
+ if (oversized) {
+ /* An optimization for frame sizes equal to 3 * power_of_two.
+ * 3 KSMs point to the frame, and one KSM points to the garbage
+ * page, which works faster than KLM.
+ */
+ if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3))
+ return MLX5E_MPWRQ_UMR_MODE_TRIPLE;
+
+ return MLX5E_MPWRQ_UMR_MODE_OVERSIZED;
+ }
+
+ /* XSK frames can start at arbitrary unaligned locations, but they all
+ * have the same size which is a power of two. It allows to optimize to
+ * one KSM per frame.
+ */
+ if (unaligned)
+ return MLX5E_MPWRQ_UMR_MODE_UNALIGNED;
+
+ /* XSK: frames are naturally aligned, MTT can be used.
+ * Non-XSK: Allocations happen in units of CPU pages, therefore, the
+ * mappings are naturally aligned.
+ */
+ return MLX5E_MPWRQ_UMR_MODE_ALIGNED;
+}
+
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
+{
+ switch (mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return sizeof(struct mlx5_mtt);
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return sizeof(struct mlx5_ksm);
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return sizeof(struct mlx5_klm) * 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return sizeof(struct mlx5_ksm) * 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
+ return 0;
+}
+
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u8 max_pages_per_wqe, max_log_mpwqe_size;
+ u16 max_wqe_size;
+
+ /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
+ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
+ max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
+ MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size;
+ max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;
+
+ WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);
+
+ return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
+}
+
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+ u8 pages_per_wqe;
+
+ pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
+
+ /* Two MTTs are needed to form an octword. The number of MTTs is encoded
+ * in octwords in a UMR WQE, so we need at least two to avoid mapping
+ * garbage addresses.
+ */
+ if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ pages_per_wqe = 2;
+
+ /* Sanity check for further calculations to succeed. */
+ BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
+ if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
+ return MLX5_MPWRQ_MAX_PAGES_PER_WQE;
+
+ return pages_per_wqe;
+}
+
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u16 umr_wqe_sz;
+
+ umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) +
+ ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
+
+ WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK);
+
+ return umr_wqe_sz;
+}
+
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode),
+ MLX5_SEND_WQE_BB);
+}
+
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+
+ /* Add another page as a buffer between WQEs. This page will absorb
+ * write overflow by the hardware, when receiving packets larger than
+ * MTU. These oversize packets are dropped by the driver at a later
+ * stage.
+ */
+ return ALIGN(pages_per_wqe + 1,
+ MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
+}
+
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- return params->xdp_prog || xsk;
+ /* Same limits apply to KSMs and KLMs. */
+ u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS,
+ 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5E_MAX_RQ_NUM_MTTS;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return klm_limit;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ /* Each entry is two KLMs. */
+ return klm_limit / 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ /* Each entry is four KSMs. */
+ return klm_limit / 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode);
+ u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode);
+
+ return ilog2(max_entries / mtts_per_wqe);
+}
+
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) +
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ MLX5E_ORDER2_MAX_PACKET_MTU;
}
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
@@ -23,7 +223,7 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
return xsk->headroom;
headroom = NET_IP_ALIGN;
- if (mlx5e_rx_is_xdp(params, xsk))
+ if (params->xdp_prog)
headroom += XDP_PACKET_HEADROOM;
else
headroom += MLX5_RX_HEADROOM;
@@ -31,70 +231,80 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
return headroom;
}
-u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
- return linear_rq_headroom + hw_mtu;
+ return xsk->headroom + hw_mtu;
}
-static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
{
- u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
-
- /* AF_XDP doesn't build SKBs in place. */
- if (!xsk)
- frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
+ /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
+ u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a
- * special case. It can run with frames smaller than a page, as it
- * doesn't allocate pages dynamically. However, here we pretend that
- * fragments are page-sized: it allows to treat XSK frames like pages
- * by redirecting alloc and free operations to XSK rings and by using
- * the fact there are no multiple packets per "page" (which is a frame).
- * The latter is important, because frames may come in a random order,
- * and we will have trouble assemblying a real page of multiple frames.
- */
- if (mlx5e_rx_is_xdp(params, xsk))
- frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
+ return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
+}
- /* Even if we can go with a smaller fragment size, we must not put
- * multiple packets into a single frame.
+static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ bool mpwqe)
+{
+ /* XSK frames are mapped as individual pages, because frames may come in
+ * an arbitrary order from random locations in the UMEM.
*/
if (xsk)
- frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
+ return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
- return frag_sz;
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SIZE;
+
+ return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
+ u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
- return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ order_base_2(linear_stride_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
- * than one page. For this, check both with and without xsk.
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
+ return false;
+
+ /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
+ * must fit into a CPU page.
*/
- u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
- mlx5e_rx_get_linear_frag_sz(params, NULL));
+ if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
+ return false;
+
+ /* XSK frames must be big enough to hold the packet data. */
+ if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size)
+ return false;
- return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
- linear_frag_sz <= PAGE_SIZE;
+ return true;
}
-bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
- u8 log_stride_sz, u8 log_num_strides)
+static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ u8 log_stride_sz, u8 log_num_strides,
+ u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ)
+ if (log_stride_sz + log_num_strides !=
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode))
return false;
if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
@@ -114,28 +324,53 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- s8 log_num_strides;
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 log_num_strides;
u8 log_stride_sz;
+ u8 log_wqe_sz;
+
+ if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
+ return false;
+
+ log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
- if (!mlx5e_rx_is_linear_skb(params, xsk))
+ if (log_wqe_sz < log_stride_sz)
return false;
- log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz;
+ log_num_strides = log_wqe_sz - log_stride_sz;
- return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides);
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz,
+ log_num_strides, page_shift,
+ umr_mode);
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
+
+ log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
+ page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+ /* Ethtool's rx_max_pending is calculated for regular RQ, that uses
+ * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a
+ * frame size not equal to PAGE_SIZE.
+ * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on
+ * unexpected failure.
+ */
+ if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size))
+ return max_log_rq_size;
+
return params->log_rq_mtu_frames - log_pkts_per_wqe;
}
@@ -165,7 +400,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk)
{
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
+ return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
@@ -174,20 +409,35 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- return MLX5_MPWRQ_LOG_WQE_SZ -
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
+{
+#define UMR_WQE_BULK (2)
+ return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1);
+}
+
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params, xsk) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+ return linear_headroom;
+
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return linear_headroom;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ return linear_headroom;
- return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
- mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+ return 0;
}
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -195,14 +445,14 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
u16 stop_room;
- stop_room = mlx5e_tls_get_stop_room(mdev, params);
- stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ stop_room = mlx5e_ktls_get_stop_room(mdev, params);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
if (is_mpwqe)
- /* A MPWQE can take up to the maximum-sized WQE + all the normal
- * stop room can be taken if a new packet breaks the active
- * MPWQE session and allocates its WQEs right away.
+ /* A MPWQE can take up to the maximum cacheline-aligned WQE +
+ * all the normal stop room can be taken if a new packet breaks
+ * the active MPWQE session and allocates its WQEs right away.
*/
- stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ stop_room += mlx5e_stop_room_for_mpwqe(mdev);
return stop_room;
}
@@ -309,25 +559,46 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
- if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
- return false;
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL);
- if (mlx5_fpga_is_ipsec_device(mdev))
- return false;
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
- if (params->xdp_prog) {
- /* XSK params are not considered here. If striding RQ is in use,
- * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
- * be called with the known XSK params.
- */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- return false;
+ if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ u16 max_mtu_pkts;
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return -EINVAL;
+
+ /* Current RQ length is too big for the given frame size, the
+ * needed number of WQEs exceeds the maximum.
+ */
+ max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
+ if (params->log_rq_mtu_frames > max_mtu_pkts) {
+ mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
+ 1 << params->log_rq_mtu_frames, xsk->chunk_size);
+ return -EINVAL;
}
- return true;
+ return 0;
}
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
@@ -340,7 +611,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
- BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
+ BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
BIT(params->log_rq_mtu_frames),
BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
@@ -348,8 +619,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
- params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
- MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_CYCLIC;
}
@@ -359,15 +629,16 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
{
/* Prefer Striding RQ, unless any of the following holds:
* - Striding RQ configuration is not possible/supported.
- * - Slow PCI heuristic.
+ * - CQE compression is ON, and stride_index mini_cqe layout is not supported.
* - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
*
* No XSK params: checking the availability of striding RQ in general.
*/
- if (!slow_pci_heuristic(mdev) &&
- mlx5e_striding_rq_possible(mdev, params) &&
+ if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ||
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) &&
+ !mlx5e_mpwrq_validate_regular(mdev, params) &&
(mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
- !mlx5e_rx_is_linear_skb(params, NULL)))
+ !mlx5e_rx_is_linear_skb(mdev, params, NULL)))
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
@@ -385,58 +656,132 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
};
}
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
+{
+ if (xdp)
+ /* XDP requires all fragments to be of the same size. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size;
+
+ /* Optimization for small packets: the last fragment is bigger than the others. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
+}
+
#define DEFAULT_FRAG_SIZE (2048)
-static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_frags_info *info)
+static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_frags_info *info)
{
u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
int frag_size_max = DEFAULT_FRAG_SIZE;
+ int first_frag_size_max;
u32 buf_size = 0;
+ u16 headroom;
+ int max_mtu;
int i;
- if (mlx5_fpga_is_ipsec_device(mdev))
- byte_count += MLX5E_METADATA_ETHER_LEN;
-
- if (mlx5e_rx_is_linear_skb(params, xsk)) {
+ if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
int frag_stride;
- frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
- frag_stride = roundup_pow_of_two(frag_stride);
+ frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);
info->arr[0].frag_size = byte_count;
info->arr[0].frag_stride = frag_stride;
info->num_frags = 1;
- info->wqe_bulk = PAGE_SIZE / frag_stride;
+
+ /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The
+ * first WQE in the page is responsible for allocation of this
+ * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are
+ * still not completed, the allocation must stop before k*N.
+ */
+ info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1;
+
goto out;
}
- if (byte_count > PAGE_SIZE +
- (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
+ headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu || params->xdp_prog) {
frag_size_max = PAGE_SIZE;
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu) {
+ mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
+ params->sw_mtu, max_mtu);
+ return -EINVAL;
+ }
+ }
i = 0;
while (buf_size < byte_count) {
int frag_size = byte_count - buf_size;
- if (i < MLX5E_MAX_RX_FRAGS - 1)
+ if (i == 0)
+ frag_size = min(frag_size, first_frag_size_max);
+ else if (i < MLX5E_MAX_RX_FRAGS - 1)
frag_size = min(frag_size, frag_size_max);
info->arr[i].frag_size = frag_size;
- info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
-
buf_size += frag_size;
+
+ if (params->xdp_prog) {
+ /* XDP multi buffer expects fragments of the same size. */
+ info->arr[i].frag_stride = frag_size_max;
+ } else {
+ if (i == 0) {
+ /* Ensure that headroom and tailroom are included. */
+ frag_size += headroom;
+ frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+ }
+
i++;
}
info->num_frags = i;
- /* number of different wqes sharing a page */
- info->wqe_bulk = 1 + (info->num_frags % 2);
+
+ /* The last fragment of WQE with index 2*N may share the page with the
+ * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1
+ * is not completed yet, WQE 2*N must not be allocated, as it's
+ * responsible for allocating a new page.
+ */
+ if (frag_size_max == PAGE_SIZE) {
+ /* No WQE can start in the middle of a page. */
+ info->wqe_index_mask = 0;
+ } else {
+ /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments,
+ * because there would be more than MLX5E_MAX_RX_FRAGS of them.
+ */
+ WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE);
+
+ /* Odd number of fragments allows to pack the last fragment of
+ * the previous WQE and the first fragment of the next WQE into
+ * the same page.
+ * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS
+ * is 4, the last fragment can be bigger than the rest only if
+ * it's the fourth one, so WQEs consisting of 3 fragments will
+ * always share a page.
+ * When a page is shared, WQE bulk size is 2, otherwise just 1.
+ */
+ info->wqe_index_mask = info->num_frags % 2;
+ }
out:
- info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
+ /* Bulking optimization to skip allocation until at least 8 WQEs can be
+ * allocated in a row. At the same time, never start allocation when
+ * the page is still used by older WQEs.
+ */
+ info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+
info->log_num_frags = order_base_2(info->num_frags);
+
+ return 0;
}
static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
@@ -472,7 +817,7 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
- int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk));
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
int wqe_size = BIT(log_stride_sz) * num_strides;
/* +1 is for the case that the pkt_per_rsrv dont consume the reservation
@@ -496,7 +841,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
else
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -533,17 +878,22 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
int ndsegs = 1;
+ int err;
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
- log_wqe_num_of_strides)) {
+ log_wqe_num_of_strides,
+ page_shift, umr_mode)) {
mlx5_core_err(mdev,
- "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n",
- log_wqe_stride_size, log_wqe_num_of_strides);
+ "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
+ log_wqe_stride_size, log_wqe_num_of_strides,
+ umr_mode);
return -EINVAL;
}
@@ -551,7 +901,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
MLX5_SET(wq, wq, log_wqe_stride_size,
log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
- MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
MLX5_SET(wq, wq, shampo_enable, true);
MLX5_SET(wq, wq, log_reservation_size,
@@ -572,7 +922,9 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
}
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ if (err)
+ return err;
ndsegs = param->frags_info.num_frags;
}
@@ -638,8 +990,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
bool allow_swp;
- allow_swp = mlx5_geneve_tx_allowed(mdev) ||
- !!MLX5_IPSEC_DEV(mdev);
+ allow_swp =
+ mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev);
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
@@ -661,13 +1013,6 @@ static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
-static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
-{
- void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-
- return MLX5_GET(wq, wq, log_wq_sz);
-}
-
/* This function calculates the maximum number of headers entries that are needed
* per WQE, the formula is based on the size of the reservations and the
* restriction we have about max packets for reservation that is equal to max
@@ -728,25 +1073,98 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
return wqebbs;
}
+static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 umr_wqebbs;
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+
+ return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+}
+
static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_param *rqp)
{
- u32 wqebbs;
+ u32 wqebbs, total_pages, useful_space;
/* MLX5_WQ_TYPE_CYCLIC */
if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
- wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc));
+ /* UMR WQEs for the regular RQ. */
+ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+
+ /* If XDP program is attached, XSK may be turned on at any time without
+ * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
+ * both regular RQ and XSK RQ.
+ *
+ * XSK uses different values of page_shift, and the total number of UMR
+ * WQEBBs depends on it. This dependency is complex and not monotonic,
+ * especially taking into consideration that some of the parameters come
+ * from capabilities. Hence, we have to try all valid values of XSK
+ * frame size (and page_shift) to find the maximum.
+ */
+ if (params->xdp_prog) {
+ u32 max_xsk_wqebbs = 0;
+ u8 frame_shift;
+
+ for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
+ frame_shift <= PAGE_SHIFT; frame_shift++) {
+ /* The headroom doesn't affect the calculation. */
+ struct mlx5e_xsk_param xsk = {
+ .chunk_size = 1 << frame_shift,
+ .unaligned = false,
+ };
+
+ /* XSK aligned mode. */
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a power of two. */
+ xsk.unaligned = true;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is not equal to stride size. */
+ xsk.chunk_size -= 1;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a triple power of two. */
+ xsk.chunk_size = (1 << frame_shift) / 4 * 3;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+ }
+
+ wqebbs += max_xsk_wqebbs;
+ }
+
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+
+ /* UMR WQEs don't cross the page boundary, they are padded with NOPs.
+ * This padding is always smaller than the max WQE size. That gives us
+ * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
+ * per page. The number of pages is estimated as the total size of WQEs
+ * divided by the useful space in page, rounding up. If some WQEs don't
+ * fully fit into the useful space, they can occupy part of the padding,
+ * which proves this estimation to be correct (reserve enough space).
+ */
+ useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB;
+ total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space);
+ wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB);
+
return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
}
static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
{
- if (mlx5e_accel_is_ktls_rx(mdev))
+ if (mlx5e_is_ktls_rx(mdev))
return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
@@ -774,10 +1192,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
mlx5e_build_sq_param_common(mdev, param);
- param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
- param->is_tls = mlx5e_accel_is_ktls_rx(mdev);
+ param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
+ param->is_tls = mlx5e_is_ktls_rx(mdev);
if (param->is_tls)
- param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */
+ param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
@@ -785,6 +1203,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
@@ -793,6 +1212,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+ param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
@@ -812,7 +1232,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
- mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
+ mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq);
mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 433e6967692d..034debd140bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -9,6 +9,7 @@
struct mlx5e_xsk_param {
u16 headroom;
u16 chunk_size;
+ bool unaligned;
};
struct mlx5e_cq_param {
@@ -31,6 +32,7 @@ struct mlx5e_sq_param {
struct mlx5_wq_param wq;
bool is_mpw;
bool is_tls;
+ bool is_xdp_mb;
u16 stop_room;
};
@@ -51,37 +53,26 @@ struct mlx5e_create_sq_param {
u8 min_inline_mode;
};
-static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
- u16 qid,
- enum mlx5e_rq_group group,
- u16 *ix)
-{
- int nch = params->num_channels;
- int ch = qid - nch * group;
-
- if (ch < 0 || ch >= nch)
- return false;
-
- *ix = ch;
- return true;
-}
-
-static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
- u16 qid,
- u16 *ix,
- enum mlx5e_rq_group *group)
-{
- u16 nch = params->num_channels;
-
- *ix = qid % nch;
- *group = qid / nch;
-}
-
-static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
- struct mlx5e_params *params, u64 qid)
-{
- return qid < params->num_channels * profile->rq_groups;
-}
+/* Striding RQ dynamic parameters */
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode);
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
/* Parameter calculations */
@@ -91,25 +82,23 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
- u8 log_stride_sz, u8 log_num_strides);
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
@@ -129,6 +118,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
@@ -154,6 +144,7 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
struct mlx5e_cq_param *param);
void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param);
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index 673f1c82d381..c9d5d8d93994 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz,
- xoff, &port_buffer, &update_buffer);
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+ port_buff_cell_sz, &port_buffer, &update_buffer);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 18d542b1c5cb..8469e9c38670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -79,19 +79,49 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
}
+#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
+
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+}
+
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ struct sk_buff *skb;
+
+ ptpsq->cq_stats->resync_event++;
+
+ while (skb_cc != skb_id) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+ ptpsq->cq_stats->resync_cqe++;
+ skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ }
+}
+
static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
struct mlx5_cqe64 *cqe,
int budget)
{
- struct sk_buff *skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct sk_buff *skb;
ktime_t hwtstamp;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
ptpsq->cq_stats->err_cqe++;
goto out;
}
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
+
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
hwtstamp, ptpsq->cq_stats);
@@ -195,7 +225,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
- sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
@@ -242,6 +271,7 @@ static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq);
+ struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev;
ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)),
GFP_KERNEL, numa);
@@ -251,7 +281,9 @@ static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc;
ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc;
ptpsq->skb_fifo.mask = wq_sz - 1;
-
+ if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter))
+ ptpsq->ts_cqe_ctr_mask =
+ (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1;
return 0;
}
@@ -449,7 +481,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
- param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
@@ -590,37 +622,39 @@ static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
}
-static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
if (!ptp_fs->valid)
return;
mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
- mlx5e_fs_tt_redirect_any_destroy(priv);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
- mlx5e_fs_tt_redirect_udp_destroy(priv);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
ptp_fs->valid = false;
}
static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
{
u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
- struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+ struct mlx5e_flow_steering *fs = priv->fs;
struct mlx5_flow_handle *rule;
+ struct mlx5e_ptp_fs *ptp_fs;
int err;
+ ptp_fs = mlx5e_fs_get_ptp(fs);
if (ptp_fs->valid)
return 0;
- err = mlx5e_fs_tt_redirect_udp_create(priv);
+ err = mlx5e_fs_tt_redirect_udp_create(fs);
if (err)
goto out_free;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -628,7 +662,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v4_rule = rule;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -636,11 +670,11 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v6_rule = rule;
- err = mlx5e_fs_tt_redirect_any_create(priv);
+ err = mlx5e_fs_tt_redirect_any_create(fs);
if (err)
goto out_destroy_udp_v6_rule;
- rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588);
+ rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
goto out_destroy_fs_any;
@@ -651,13 +685,13 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
return 0;
out_destroy_fs_any:
- mlx5e_fs_tt_redirect_any_destroy(priv);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
out_destroy_udp_v6_rule:
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
out_destroy_udp_v4_rule:
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
out_destroy_fs_udp:
- mlx5e_fs_tt_redirect_udp_destroy(priv);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
out_free:
return err;
}
@@ -691,7 +725,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (err)
goto err_free;
- netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);
+ netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll);
mlx5e_ptp_build_params(c, cparams, params);
@@ -737,6 +771,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
mlx5e_ptp_rx_set_fs(c->priv);
mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_sched(&c->napi);
}
}
@@ -764,29 +799,31 @@ int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
return 0;
}
-int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv)
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
{
struct mlx5e_ptp_fs *ptp_fs;
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
return 0;
ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
if (!ptp_fs)
return -ENOMEM;
+ mlx5e_fs_set_ptp(fs, ptp_fs);
- priv->fs.ptp_fs = ptp_fs;
return 0;
}
-void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv)
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
{
- struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
return;
- mlx5e_ptp_rx_unset_fs(priv);
+ mlx5e_ptp_rx_unset_fs(fs);
kfree(ptp_fs);
}
@@ -794,7 +831,7 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
{
struct mlx5e_ptp *c = priv->channels.ptp;
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
return 0;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
@@ -812,6 +849,6 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
return -EINVAL;
}
- mlx5e_ptp_rx_unset_fs(priv);
+ mlx5e_ptp_rx_unset_fs(priv->fs);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index a71a32e00ebb..cc7efde88ac3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -6,6 +6,7 @@
#include "en.h"
#include "en_stats.h"
+#include "en/txrx.h"
#include <linux/ptp_classify.h>
#define MLX5E_PTP_CHANNEL_IX 0
@@ -17,6 +18,7 @@ struct mlx5e_ptpsq {
u16 skb_fifo_pc;
struct mlx5e_skb_fifo skb_fifo;
struct mlx5e_ptp_cq_stats *cq_stats;
+ u16 ts_cqe_ctr_mask;
};
enum {
@@ -67,14 +69,24 @@ static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
fk.ports.dst == htons(PTP_EV_PORT));
}
+static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq)
+{
+ if (!sq->ptpsq)
+ return true;
+
+ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo);
+}
+
int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
u8 lag_port, struct mlx5e_ptp **cp);
void mlx5e_ptp_close(struct mlx5e_ptp *c);
void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
-int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv);
-void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 50977f01a050..2842195ee548 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -1,11 +1,17 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
#include "en.h"
#include "params.h"
#include "../qos.h"
+#include "en/htb.h"
-#define BYTES_IN_MBIT 125000
+struct qos_sq_callback_params {
+ struct mlx5e_priv *priv;
+ struct mlx5e_channels *chs;
+};
int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes)
{
@@ -27,121 +33,14 @@ int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
}
-int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv)
-{
- int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev));
-
- return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1;
-}
-
-/* Software representation of the QoS tree (internal to this file) */
-
-static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
-{
- int size = mlx5e_qos_max_leaf_nodes(priv->mdev);
- int res;
-
- WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
- res = find_first_zero_bit(priv->htb.qos_used_qids, size);
-
- return res == size ? -ENOSPC : res;
-}
-
-struct mlx5e_qos_node {
- struct hlist_node hnode;
- struct rcu_head rcu;
- struct mlx5e_qos_node *parent;
- u64 rate;
- u32 bw_share;
- u32 max_average_bw;
- u32 hw_id;
- u32 classid; /* 16-bit, except root. */
- u16 qid;
-};
-
-#define MLX5E_QOS_QID_INNER 0xffff
-#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
-
-static struct mlx5e_qos_node *
-mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid,
- struct mlx5e_qos_node *parent)
-{
- struct mlx5e_qos_node *node;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return ERR_PTR(-ENOMEM);
-
- node->parent = parent;
-
- node->qid = qid;
- __set_bit(qid, priv->htb.qos_used_qids);
-
- node->classid = classid;
- hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid);
-
- mlx5e_update_tx_netdev_queues(priv);
-
- return node;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv)
-{
- struct mlx5e_qos_node *node;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return ERR_PTR(-ENOMEM);
-
- node->qid = MLX5E_QOS_QID_INNER;
- node->classid = MLX5E_HTB_CLASSID_ROOT;
- hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid);
-
- return node;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid)
-{
- struct mlx5e_qos_node *node = NULL;
-
- hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) {
- if (node->classid == classid)
- break;
- }
-
- return node;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid)
-{
- struct mlx5e_qos_node *node = NULL;
-
- hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) {
- if (node->classid == classid)
- break;
- }
-
- return node;
-}
-
-static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
-{
- hash_del_rcu(&node->hnode);
- if (node->qid != MLX5E_QOS_QID_INNER) {
- __clear_bit(node->qid, priv->htb.qos_used_qids);
- mlx5e_update_tx_netdev_queues(priv);
- }
- kfree_rcu(node, rcu);
-}
-
/* TX datapath API */
-static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
{
/* These channel params are safe to access from the datapath, because:
- * 1. This function is called only after checking priv->htb.maj_id != 0,
+ * 1. This function is called only after checking selq->htb_maj_id != 0,
* and the number of queues can't change while HTB offload is active.
- * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for
+ * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for
* mlx5e_select_queue to finish while holding priv->state_lock,
* preventing other code from changing the number of queues.
*/
@@ -150,30 +49,7 @@ static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
}
-int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
-{
- struct mlx5e_qos_node *node;
- u16 qid;
- int res;
-
- rcu_read_lock();
-
- node = mlx5e_sw_node_find_rcu(priv, classid);
- if (!node) {
- res = -ENOENT;
- goto out;
- }
- qid = READ_ONCE(node->qid);
- if (qid == MLX5E_QOS_QID_INNER) {
- res = -EINVAL;
- goto out;
- }
- res = mlx5e_qid_from_qos(&priv->channels, qid);
-
-out:
- rcu_read_unlock();
- return res;
-}
+/* SQ lifecycle */
static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
{
@@ -190,10 +66,8 @@ static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
return mlx5e_state_dereference(priv, qos_sqs[qid]);
}
-/* SQ lifecycle */
-
-static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
- struct mlx5e_qos_node *node)
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id)
{
struct mlx5e_create_cq_param ccp = {};
struct mlx5e_txqsq __rcu **qos_sqs;
@@ -206,13 +80,13 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
params = &chs->params;
- txq_ix = mlx5e_qid_from_qos(chs, node->qid);
+ txq_ix = mlx5e_qid_from_qos(chs, node_qid);
- WARN_ON(node->qid > priv->htb.max_qos_sqs);
- if (node->qid == priv->htb.max_qos_sqs) {
+ WARN_ON(node_qid > priv->htb_max_qos_sqs);
+ if (node_qid == priv->htb_max_qos_sqs) {
struct mlx5e_sq_stats *stats, **stats_list = NULL;
- if (priv->htb.max_qos_sqs == 0) {
+ if (priv->htb_max_qos_sqs == 0) {
stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
sizeof(*stats_list),
GFP_KERNEL);
@@ -225,16 +99,16 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
return -ENOMEM;
}
if (stats_list)
- WRITE_ONCE(priv->htb.qos_sq_stats, stats_list);
- WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats);
- /* Order max_qos_sqs increment after writing the array pointer.
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
+ /* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
*/
- smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1);
+ smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1);
}
- ix = node->qid % params->num_channels;
- qid = node->qid / params->num_channels;
+ ix = node_qid % params->num_channels;
+ qid = node_qid / params->num_channels;
c = chs->c[ix];
qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
@@ -253,8 +127,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
if (err)
goto err_free_sq;
err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
- &param_sq, sq, 0, node->hw_id,
- priv->htb.qos_sq_stats[node->qid]);
+ &param_sq, sq, 0, hw_id,
+ priv->htb_qos_sq_stats[node_qid]);
if (err)
goto err_close_cq;
@@ -269,13 +143,29 @@ err_free_sq:
return err;
}
-static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id)
{
+ struct qos_sq_callback_params *cb_params = data;
+
+ return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id);
+}
+
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_priv *priv = data;
struct mlx5e_txqsq *sq;
+ u16 qid;
- sq = mlx5e_get_qos_sq(priv, node->qid);
+ sq = mlx5e_get_qos_sq(priv, node_qid);
- WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
+ qid = mlx5e_qid_from_qos(&priv->channels, node_qid);
+
+ /* If it's a new queue, it will be marked as started at this point.
+ * Stop it before updating txq2sq.
+ */
+ mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
+
+ priv->txq2sq[qid] = sq;
/* Make the change to txq2sq visible before the queue is started.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@@ -283,11 +173,13 @@ static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node
*/
smp_wmb();
- qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid);
+ qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid);
mlx5e_activate_txqsq(sq);
+
+ return 0;
}
-static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
{
struct mlx5e_txqsq *sq;
@@ -298,11 +190,16 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
- /* The queue is disabled, no synchronization with datapath is needed. */
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+
+ /* Make the change to txq2sq visible before the queue is started again.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
}
-static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
{
struct mlx5e_txqsq __rcu **qos_sqs;
struct mlx5e_params *params;
@@ -352,7 +249,7 @@ void mlx5e_qos_close_queues(struct mlx5e_channel *c)
kvfree(qos_sqs);
}
-static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
{
int i;
@@ -360,7 +257,7 @@ static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
mlx5e_qos_close_queues(chs->c[i]);
}
-static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
{
u16 qos_sqs_size;
int i;
@@ -396,24 +293,20 @@ err_free:
int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
{
- struct mlx5e_qos_node *node = NULL;
- int bkt, err;
-
- if (!priv->htb.maj_id)
- return 0;
+ struct qos_sq_callback_params callback_params;
+ int err;
err = mlx5e_qos_alloc_queues(priv, chs);
if (err)
return err;
- hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
- if (node->qid == MLX5E_QOS_QID_INNER)
- continue;
- err = mlx5e_open_qos_sq(priv, chs, node);
- if (err) {
- mlx5e_qos_close_all_queues(chs);
- return err;
- }
+ callback_params.priv = priv;
+ callback_params.chs = chs;
+
+ err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params);
+ if (err) {
+ mlx5e_qos_close_all_queues(chs);
+ return err;
}
return 0;
@@ -421,14 +314,7 @@ int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
{
- struct mlx5e_qos_node *node = NULL;
- int bkt;
-
- hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
- if (node->qid == MLX5E_QOS_QID_INNER)
- continue;
- mlx5e_activate_qos_sq(priv, node);
- }
+ mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv);
}
void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
@@ -457,7 +343,7 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
}
}
-static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
{
int i;
@@ -465,278 +351,14 @@ static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
mlx5e_qos_deactivate_queues(chs->c[i]);
}
-/* HTB API */
-
-int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *root;
- bool opened;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
-
- if (!mlx5_qos_is_supported(priv->mdev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
- return -EOPNOTSUPP;
- }
-
- opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
- if (opened) {
- err = mlx5e_qos_alloc_queues(priv, &priv->channels);
- if (err)
- return err;
- }
-
- root = mlx5e_sw_node_create_root(priv);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto err_free_queues;
- }
-
- err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
- goto err_sw_node_delete;
- }
-
- WRITE_ONCE(priv->htb.defcls, htb_defcls);
- /* Order maj_id after defcls - pairs with
- * mlx5e_select_queue/mlx5e_select_htb_queues.
- */
- smp_store_release(&priv->htb.maj_id, htb_maj_id);
-
- return 0;
-
-err_sw_node_delete:
- mlx5e_sw_node_delete(priv, root);
-
-err_free_queues:
- if (opened)
- mlx5e_qos_close_all_queues(&priv->channels);
- return err;
-}
-
-int mlx5e_htb_root_del(struct mlx5e_priv *priv)
-{
- struct mlx5e_qos_node *root;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
-
- WRITE_ONCE(priv->htb.maj_id, 0);
- synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
-
- root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
- if (!root) {
- qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n");
- return -ENOENT;
- }
- err = mlx5_qos_destroy_node(priv->mdev, root->hw_id);
- if (err)
- qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n",
- root->hw_id, err);
- mlx5e_sw_node_delete(priv, root);
-
- mlx5e_qos_deactivate_all_queues(&priv->channels);
- mlx5e_qos_close_all_queues(&priv->channels);
-
- return err;
-}
-
-static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
- struct mlx5e_qos_node *parent, u32 *bw_share)
-{
- u64 share = 0;
-
- while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
- parent = parent->parent;
-
- if (parent->max_average_bw)
- share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
- parent->max_average_bw);
- else
- share = 101;
-
- *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
-
- qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
- rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
-
- return 0;
-}
-
-static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
-{
- *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
-
- qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
- ceil, *max_average_bw);
-}
-
-int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
- u32 parent_classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *node, *parent;
- int qid;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
- classid, parent_classid, rate, ceil);
-
- qid = mlx5e_find_unused_qos_qid(priv);
- if (qid < 0) {
- NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
- return qid;
- }
-
- parent = mlx5e_sw_node_find(priv, parent_classid);
- if (!parent)
- return -EINVAL;
-
- node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent);
- if (IS_ERR(node))
- return PTR_ERR(node);
-
- node->rate = rate;
- mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share);
- mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw);
-
- err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id,
- node->bw_share, node->max_average_bw,
- &node->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
- qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
- classid, err);
- mlx5e_sw_node_delete(priv, node);
- return err;
- }
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, node);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
- classid, err);
- } else {
- mlx5e_activate_qos_sq(priv, node);
- }
- }
-
- return mlx5e_qid_from_qos(&priv->channels, node->qid);
-}
-
-int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
- u64 rate, u64 ceil, struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *node, *child;
- int err, tmp_err;
- u32 new_hw_id;
- u16 qid;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
- classid, child_classid, rate, ceil);
-
- node = mlx5e_sw_node_find(priv, classid);
- if (!node)
- return -ENOENT;
-
- err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id,
- node->bw_share, node->max_average_bw,
- &new_hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
- qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n",
- classid, err);
- return err;
- }
-
- /* Intentionally reuse the qid for the upcoming first child. */
- child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node);
- if (IS_ERR(child)) {
- err = PTR_ERR(child);
- goto err_destroy_hw_node;
- }
-
- child->rate = rate;
- mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share);
- mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw);
-
- err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share,
- child->max_average_bw, &child->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
- qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
- classid, err);
- goto err_delete_sw_node;
- }
-
- /* No fail point. */
-
- qid = node->qid;
- /* Pairs with mlx5e_get_txq_by_classid. */
- WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- mlx5e_deactivate_qos_sq(priv, qid);
- mlx5e_close_qos_sq(priv, qid);
- }
-
- err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
-
- node->hw_id = new_hw_id;
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, child);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
- classid, err);
- } else {
- mlx5e_activate_qos_sq(priv, child);
- }
- }
-
- return 0;
-
-err_delete_sw_node:
- child->qid = MLX5E_QOS_QID_INNER;
- mlx5e_sw_node_delete(priv, child);
-
-err_destroy_hw_node:
- tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id);
- if (tmp_err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
- new_hw_id, classid, tmp_err);
- return err;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid)
-{
- struct mlx5e_qos_node *node = NULL;
- int bkt;
-
- hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode)
- if (node->qid == qid)
- break;
-
- return node;
-}
-
-static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
{
qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
netdev_tx_reset_queue(txq);
netif_tx_start_queue(txq);
}
-static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
{
struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
@@ -749,251 +371,65 @@ static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
spin_unlock_bh(qdisc_lock(qdisc));
}
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *node;
- struct netdev_queue *txq;
- u16 qid, moved_qid;
- bool opened;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
-
- node = mlx5e_sw_node_find(priv, *classid);
- if (!node)
- return -ENOENT;
-
- /* Store qid for reuse. */
- qid = node->qid;
-
- opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
- if (opened) {
- txq = netdev_get_tx_queue(priv->netdev,
- mlx5e_qid_from_qos(&priv->channels, qid));
- mlx5e_deactivate_qos_sq(priv, qid);
- mlx5e_close_qos_sq(priv, qid);
- }
-
- err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, *classid, err);
-
- mlx5e_sw_node_delete(priv, node);
-
- moved_qid = mlx5e_qos_cur_leaf_nodes(priv);
-
- if (moved_qid == 0) {
- /* The last QoS SQ was just destroyed. */
- if (opened)
- mlx5e_reactivate_qos_sq(priv, qid, txq);
- return 0;
- }
- moved_qid--;
-
- if (moved_qid < qid) {
- /* The highest QoS SQ was just destroyed. */
- WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
- qid, moved_qid);
- if (opened)
- mlx5e_reactivate_qos_sq(priv, qid, txq);
- return 0;
- }
-
- WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
- qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
-
- node = mlx5e_sw_node_find_by_qid(priv, moved_qid);
- WARN(!node, "Could not find a node with qid %u to move to queue %u",
- moved_qid, qid);
-
- /* Stop traffic to the old queue. */
- WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
- __clear_bit(moved_qid, priv->htb.qos_used_qids);
-
- if (opened) {
- txq = netdev_get_tx_queue(priv->netdev,
- mlx5e_qid_from_qos(&priv->channels, moved_qid));
- mlx5e_deactivate_qos_sq(priv, moved_qid);
- mlx5e_close_qos_sq(priv, moved_qid);
- }
-
- /* Prevent packets from the old class from getting into the new one. */
- mlx5e_reset_qdisc(priv->netdev, moved_qid);
-
- __set_bit(qid, priv->htb.qos_used_qids);
- WRITE_ONCE(node->qid, qid);
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, node);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
- node->classid, moved_qid, qid, err);
- } else {
- mlx5e_activate_qos_sq(priv, node);
- }
- }
-
- mlx5e_update_tx_netdev_queues(priv);
- if (opened)
- mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
-
- *classid = node->classid;
- return 0;
-}
-
-int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
- struct netlink_ext_ack *extack)
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt)
{
- struct mlx5e_qos_node *node, *parent;
- u32 old_hw_id, new_hw_id;
- int err, saved_err = 0;
- u16 qid;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
- force ? "_FORCE" : "", classid);
-
- node = mlx5e_sw_node_find(priv, classid);
- if (!node)
- return -ENOENT;
-
- err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id,
- node->parent->bw_share,
- node->parent->max_average_bw,
- &new_hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
- qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
- classid, err);
- if (!force)
- return err;
- saved_err = err;
- }
-
- /* Store qid for reuse and prevent clearing the bit. */
- qid = node->qid;
- /* Pairs with mlx5e_get_txq_by_classid. */
- WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- mlx5e_deactivate_qos_sq(priv, qid);
- mlx5e_close_qos_sq(priv, qid);
- }
-
- /* Prevent packets from the old class from getting into the new one. */
- mlx5e_reset_qdisc(priv->netdev, qid);
-
- err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
-
- parent = node->parent;
- mlx5e_sw_node_delete(priv, node);
+ struct mlx5e_htb *htb = priv->htb;
+ int res;
- node = parent;
- WRITE_ONCE(node->qid, qid);
+ if (!htb && htb_qopt->command != TC_HTB_CREATE)
+ return -EINVAL;
- /* Early return on error in force mode. Parent will still be an inner
- * node to be deleted by a following delete operation.
- */
- if (saved_err)
- return saved_err;
-
- old_hw_id = node->hw_id;
- node->hw_id = new_hw_id;
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, node);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
- classid, err);
- } else {
- mlx5e_activate_qos_sq(priv, node);
+ switch (htb_qopt->command) {
+ case TC_HTB_CREATE:
+ if (!mlx5_qos_is_supported(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(htb_qopt->extack,
+ "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
}
- }
-
- err = mlx5_qos_destroy_node(priv->mdev, old_hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
-
- return 0;
-}
-
-static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *child;
- int err = 0;
- int bkt;
-
- hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) {
- u32 old_bw_share = child->bw_share;
- int err_one;
-
- if (child->parent != node)
- continue;
-
- mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share);
- if (child->bw_share == old_bw_share)
- continue;
-
- err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share,
- child->max_average_bw, child->hw_id);
- if (!err && err_one) {
- err = err_one;
-
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
- qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n",
- node->classid, err);
+ priv->htb = mlx5e_htb_alloc();
+ htb = priv->htb;
+ if (!htb)
+ return -ENOMEM;
+ res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv);
+ if (res) {
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
}
+ return res;
+ case TC_HTB_DESTROY:
+ mlx5e_htb_cleanup(htb);
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ return 0;
+ case TC_HTB_LEAF_ALLOC_QUEUE:
+ res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ case TC_HTB_LEAF_TO_INNER:
+ return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL:
+ return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL_LAST:
+ case TC_HTB_LEAF_DEL_LAST_FORCE:
+ return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid,
+ htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+ htb_qopt->extack);
+ case TC_HTB_NODE_MODIFY:
+ return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil,
+ htb_qopt->extack);
+ case TC_HTB_LEAF_QUERY_QUEUE:
+ res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
}
-
- return err;
-}
-
-int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack)
-{
- u32 bw_share, max_average_bw;
- struct mlx5e_qos_node *node;
- bool ceil_changed = false;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
- classid, rate, ceil);
-
- node = mlx5e_sw_node_find(priv, classid);
- if (!node)
- return -ENOENT;
-
- node->rate = rate;
- mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share);
- mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw);
-
- err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share,
- max_average_bw, node->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
- qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n",
- classid, err);
- return err;
- }
-
- if (max_average_bw != node->max_average_bw)
- ceil_changed = true;
-
- node->bw_share = bw_share;
- node->max_average_bw = max_average_bw;
-
- if (ceil_changed)
- err = mlx5e_qos_update_children(priv, node, extack);
-
- return err;
}
struct mlx5e_mqprio_rl {
@@ -1079,3 +515,4 @@ int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_i
*hw_id = rl->leaves_id[tc];
return 0;
}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
index b7558907ba20..4947afa23b73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -6,41 +6,39 @@
#include <linux/mlx5/driver.h>
-#define MLX5E_QOS_MAX_LEAF_NODES 256
+#define BYTES_IN_MBIT 125000
struct mlx5e_priv;
+struct mlx5e_htb;
struct mlx5e_channels;
struct mlx5e_channel;
+struct tc_htb_qopt_offload;
int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes);
int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
-int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
-
-/* TX datapath API */
-int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
-struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
/* SQ lifecycle */
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id);
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id);
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq);
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid);
+
int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs);
void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs);
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+/* TX datapath API */
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid);
/* HTB API */
-int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_root_del(struct mlx5e_priv *priv);
-int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
- u32 parent_classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
- u64 rate, u64 ceil, struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack);
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb);
/* MQPRIO TX rate limit */
struct mlx5e_mqprio_rl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 9c076aa20306..b6f5c1bcdbcd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
{
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *priv;
-
- /* A given netdev is not a representor or not a slave of LAG configuration */
- if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
- return false;
-
- priv = netdev_priv(netdev);
- rpriv = priv->ppriv;
-
- /* Egress acl forward to vport is supported only non-uplink representor */
- return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
}
static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
u16 fwd_vport_num;
int err;
- if (!mlx5e_rep_is_lag_netdev(netdev))
- return;
-
info = ptr;
lag_info = info->lower_state_info;
/* This is not an event of a representor becoming active slave */
@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
struct net_device *lag_dev;
struct mlx5e_priv *priv;
- if (!mlx5e_rep_is_lag_netdev(netdev))
- return;
-
priv = netdev_priv(netdev);
rpriv = priv->ppriv;
lag_dev = info->upper_dev;
@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_bond *bond;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return NOTIFY_DONE;
+
+ bond = container_of(nb, struct mlx5e_rep_bond, nb);
+ priv = netdev_priv(netdev);
+ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
+ /* Verify VF representor is on the same device of the bond handling the netevent. */
+ if (rpriv->uplink_priv.bond != bond)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_CHANGELOWERSTATE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index c6d2f8c78db7..8099a21e674c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -164,6 +164,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr
return err;
}
+static int
+mlx5_esw_bridge_changeupper_validate_netdev(void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev))
+ return 0;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (!mlx5_lag_is_active(mdev))
+ return -EAGAIN;
+ if (!mlx5_lag_is_shared_fdb(mdev))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -171,6 +201,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
switch (event) {
case NETDEV_PRECHANGEUPPER:
+ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr);
break;
case NETDEV_CHANGEUPPER:
@@ -269,6 +300,12 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
attr->u.vlan_filtering, br_offloads);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL:
+ err = mlx5_esw_bridge_vlan_proto_set(vport_num,
+ esw_owner_vhca_id,
+ attr->u.vlan_protocol,
+ br_offloads);
+ break;
default:
err = -EOPNOTSUPP;
}
@@ -491,7 +528,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
}
br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
- err = register_netdevice_notifier(&br_offloads->netdev_nb);
+ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
if (err) {
esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
err);
@@ -509,7 +546,9 @@ err_register_swdev_blk:
err_register_swdev:
destroy_workqueue(br_offloads->wq);
err_alloc_wq:
+ rtnl_lock();
mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
}
void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
@@ -524,7 +563,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
return;
cancel_delayed_work_sync(&br_offloads->update_work);
- unregister_netdevice_notifier(&br_offloads->netdev_nb);
+ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
unregister_switchdev_notifier(&br_offloads->nb);
destroy_workqueue(br_offloads->wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index fcb0892c08a9..fac7e3ff2674 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -21,6 +21,7 @@
#include "en/tc/sample.h"
#include "en_accel/ipsec_rxtx.h"
#include "en/tc/int_port.h"
+#include "en/tc/act/act.h"
struct mlx5e_rep_indr_block_priv {
struct net_device *netdev;
@@ -263,14 +264,14 @@ int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
INIT_LIST_HEAD(&uplink_priv->unready_flows);
/* init shared tc flow table */
- err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ err = mlx5e_tc_esw_init(uplink_priv);
return err;
}
void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
{
/* delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
}
@@ -511,12 +512,129 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
return 0;
}
+static int
+mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct flow_action_entry *action;
+ struct mlx5e_tc_act *act;
+ bool add = false;
+ int i;
+
+ /* There is no use case currently for more than one action (e.g. pedit).
+ * when there will be, need to handle cleaning multiple actions on err.
+ */
+ if (!flow_offload_has_one_action(&fl_act->action))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ flow_action_for_each(i, action, &fl_act->action) {
+ act = mlx5e_tc_act_get(action->id, ns_type);
+ if (!act)
+ continue;
+
+ if (!act->offload_action)
+ continue;
+
+ if (!act->offload_action(priv, fl_act, action))
+ add = true;
+ }
+
+ return add ? 0 : -EOPNOTSUPP;
+}
+
+static int
+mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->destroy_action)
+ return -EOPNOTSUPP;
+
+ return act->destroy_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->stats_action)
+ return -EOPNOTSUPP;
+
+ return act->stats_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ switch (fl_act->command) {
+ case FLOW_ACT_REPLACE:
+ return mlx5e_rep_indr_replace_act(rpriv, fl_act);
+ case FLOW_ACT_DESTROY:
+ return mlx5e_rep_indr_destroy_act(rpriv, fl_act);
+ case FLOW_ACT_STATS:
+ return mlx5e_rep_indr_stats_act(rpriv, fl_act);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv,
+ enum tc_setup_type type,
+ void *data)
+{
+ if (!data)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_ACT:
+ return mlx5e_rep_indr_setup_act(rpriv, data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static
int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
enum tc_setup_type type, void *type_data,
void *data,
void (*cleanup)(struct flow_block_cb *block_cb))
{
+ if (!netdev)
+ return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data);
+
switch (type) {
case TC_SETUP_BLOCK:
return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index d6c7c81690eb..7c9dd3a75f8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
static inline void
mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
- struct sk_buff *skb) {}
+ struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); }
#endif /* CONFIG_MLX5_CLS_ACT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 74086eb556ae..5f6f95ad6888 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
{
+ struct mlx5e_rq *xskrq = NULL;
struct mlx5_core_dev *mdev;
struct mlx5e_icosq *icosq;
struct net_device *dev;
@@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
int err;
icosq = ctx;
+
+ mutex_lock(&icosq->channel->icosq_recovery_lock);
+
+ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
rq = &icosq->channel->rq;
+ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
+ xskrq = &icosq->channel->xskrq;
mdev = icosq->channel->mdev;
dev = icosq->channel->netdev;
err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
@@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_deactivate_rq(rq);
+ if (xskrq)
+ mlx5e_deactivate_rq(xskrq);
+
err = mlx5e_wait_for_icosq_flush(icosq);
if (err)
goto out;
@@ -97,35 +107,31 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_reset_icosq_cc_pc(icosq);
+
mlx5e_free_rx_in_progress_descs(rq);
+ if (xskrq)
+ mlx5e_free_rx_in_progress_descs(xskrq);
+
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
- mlx5e_activate_rq(rq);
+ mlx5e_activate_rq(rq);
rq->stats->recover++;
- return 0;
-out:
- clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
- return err;
-}
-
-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
-{
- struct net_device *dev = rq->netdev;
- int err;
- err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
- if (err) {
- netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
- return err;
- }
- err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
- if (err) {
- netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
- return err;
+ if (xskrq) {
+ mlx5e_activate_rq(xskrq);
+ xskrq->stats->recover++;
}
+ mlx5e_trigger_napi_icosq(icosq->channel);
+
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+
return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+ return err;
}
static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
@@ -134,19 +140,18 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
int err;
mlx5e_deactivate_rq(rq);
- mlx5e_free_rx_descs(rq);
-
- err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
+ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
+ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
if (err)
- goto out;
+ return err;
- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
mlx5e_activate_rq(rq);
rq->stats->recover++;
+ if (rq->channel)
+ mlx5e_trigger_napi_icosq(rq->channel);
+ else
+ mlx5e_trigger_napi_sched(rq->cq.napi);
return 0;
-out:
- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
- return err;
}
static int mlx5e_rx_reporter_timeout_recover(void *ctx)
@@ -706,6 +711,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
}
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
+{
+ mutex_lock(&c->icosq_recovery_lock);
+}
+
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
+{
+ mutex_unlock(&c->icosq_recovery_lock);
+}
+
static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
.name = "rx",
.recover = mlx5e_rx_reporter_recover,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 4f4bc8726ec4..60bc5b577ab9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
struct devlink_fmsg *fmsg)
{
@@ -561,11 +569,11 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
to_ctx.sq = sq;
err_ctx.ctx = &to_ctx;
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
- err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
snprintf(err_str, sizeof(err_str),
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
- jiffies_to_usecs(jiffies - sq->txq->trans_start));
+ jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
return to_ctx.status;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
index c1cdd8c2e37a..7f93426b88b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -442,7 +442,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
goto inner_tir;
err = mlx5e_tir_modify(tir, builder);
if (err) {
- mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n",
mlx5e_tir_get_tirn(tir), tt, err);
if (!final_err)
final_err = err;
@@ -457,7 +457,7 @@ inner_tir:
continue;
err = mlx5e_tir_modify(tir, builder);
if (err) {
- mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n",
mlx5e_tir_get_tirn(tir), tt, err);
if (!final_err)
final_err = err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
index 0015a81eb9a1..e1095bc36543 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -24,8 +24,6 @@ struct mlx5e_rx_res {
struct {
struct mlx5e_rqt direct_rqt;
struct mlx5e_tir direct_tir;
- struct mlx5e_rqt xsk_rqt;
- struct mlx5e_tir xsk_tir;
} *channels;
struct {
@@ -37,7 +35,6 @@ struct mlx5e_rx_res {
/* API for rx_res_rss_* */
static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
- const struct mlx5e_packet_merge_param *init_pkt_merge_param,
unsigned int init_nch)
{
bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
@@ -52,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
return -ENOMEM;
err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
- init_pkt_merge_param);
+ &res->pkt_merge_param);
if (err)
goto err_rss_free;
@@ -277,8 +274,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
}
-static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
- const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
{
bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
struct mlx5e_tir_builder *builder;
@@ -309,7 +305,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
inner_ft_support);
- mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
mlx5e_tir_builder_build_direct(builder);
err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
@@ -322,48 +318,8 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
mlx5e_tir_builder_clear(builder);
}
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- goto out;
-
- for (ix = 0; ix < res->max_nch; ix++) {
- err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
- res->mdev, false, res->drop_rqn);
- if (err) {
- mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
- err, ix);
- goto err_destroy_xsk_rqts;
- }
- }
-
- for (ix = 0; ix < res->max_nch; ix++) {
- mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- inner_ft_support);
- mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
- mlx5e_tir_builder_build_direct(builder);
-
- err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
- if (err) {
- mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
- err, ix);
- goto err_destroy_xsk_tirs;
- }
-
- mlx5e_tir_builder_clear(builder);
- }
-
goto out;
-err_destroy_xsk_tirs:
- while (--ix >= 0)
- mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
-
- ix = res->max_nch;
-err_destroy_xsk_rqts:
- while (--ix >= 0)
- mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
-
- ix = res->max_nch;
err_destroy_direct_tirs:
while (--ix >= 0)
mlx5e_tir_destroy(&res->channels[ix].direct_tir);
@@ -422,12 +378,6 @@ static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
for (ix = 0; ix < res->max_nch; ix++) {
mlx5e_tir_destroy(&res->channels[ix].direct_tir);
mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
-
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
-
- mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
- mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
}
kvfree(res->channels);
@@ -454,11 +404,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
res->pkt_merge_param = *init_pkt_merge_param;
init_rwsem(&res->pkt_merge_param_sem);
- err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
+ err = mlx5e_rx_res_rss_init_def(res, init_nch);
if (err)
goto err_out;
- err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
+ err = mlx5e_rx_res_channels_init(res);
if (err)
goto err_rss_destroy;
@@ -493,13 +443,6 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
}
-u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
-{
- WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
-
- return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
-}
-
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
{
struct mlx5e_rss *rss = res->rss[0];
@@ -525,56 +468,53 @@ static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int i
return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
}
-void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
+ struct mlx5e_channels *chs,
+ unsigned int ix)
{
- unsigned int nch, ix;
+ u32 rqn = res->rss_rqns[ix];
int err;
- nch = mlx5e_channels_get_num(chs);
-
- for (ix = 0; ix < chs->num; ix++)
- mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
- res->rss_nch = chs->num;
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+}
- mlx5e_rx_res_rss_enable(res);
+static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
+ unsigned int ix)
+{
+ int err;
- for (ix = 0; ix < nch; ix++) {
- u32 rqn;
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+}
- mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
- rqn, ix, err);
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
+ nch = mlx5e_channels_get_num(chs);
- if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
- rqn = res->drop_rqn;
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- rqn, ix, err);
+ for (ix = 0; ix < chs->num; ix++) {
+ if (mlx5e_channels_is_xsk(chs, ix))
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
}
- for (ix = nch; ix < res->max_nch; ix++) {
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
- res->drop_rqn, ix, err);
+ res->rss_nch = chs->num;
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
+ mlx5e_rx_res_rss_enable(res);
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- res->drop_rqn, ix, err);
- }
+ for (ix = 0; ix < nch; ix++)
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+ for (ix = nch; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
u32 rqn;
@@ -597,22 +537,8 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
mlx5e_rx_res_rss_disable(res);
- for (ix = 0; ix < res->max_nch; ix++) {
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
- res->drop_rqn, ix, err);
-
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
-
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- res->drop_rqn, ix, err);
- }
+ for (ix = 0; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
@@ -623,33 +549,17 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
}
}
-int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
- unsigned int ix)
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk)
{
- u32 rqn;
- int err;
-
- if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
- return -EINVAL;
-
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- rqn, ix, err);
- return err;
-}
+ if (xsk)
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
-int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
-{
- int err;
+ mlx5e_rx_res_rss_enable(res);
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- res->drop_rqn, ix, err);
- return err;
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
}
int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
index b39b20a720e0..5d5f64fab60f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -17,8 +17,7 @@ struct mlx5e_rss_params_hash;
enum mlx5e_rx_res_features {
MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
- MLX5E_RX_RES_FEATURE_XSK = BIT(1),
- MLX5E_RX_RES_FEATURE_PTP = BIT(2),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(1),
};
/* Setup */
@@ -32,7 +31,6 @@ void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
/* TIRN getters for flow steering */
u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
-u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
@@ -40,9 +38,8 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
/* Activate/deactivate API */
void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
-int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
- unsigned int ix);
-int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk);
/* Configuration API */
void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
new file mode 100644
index 000000000000..f675b1926340
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "selq.h"
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include "en.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+
+struct mlx5e_selq_params {
+ unsigned int num_regular_queues;
+ unsigned int num_channels;
+ unsigned int num_tcs;
+ union {
+ u8 is_special_queues;
+ struct {
+ bool is_htb : 1;
+ bool is_ptp : 1;
+ };
+ };
+ u16 htb_maj_id;
+ u16 htb_defcls;
+};
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+{
+ struct mlx5e_selq_params *init_params;
+
+ selq->state_lock = state_lock;
+
+ selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
+ if (!selq->standby)
+ return -ENOMEM;
+
+ init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
+ if (!init_params) {
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ return -ENOMEM;
+ }
+ /* Assign dummy values, so that mlx5e_select_queue won't crash. */
+ *init_params = (struct mlx5e_selq_params) {
+ .num_regular_queues = 1,
+ .num_channels = 1,
+ .num_tcs = 1,
+ .is_htb = false,
+ .is_ptp = false,
+ .htb_maj_id = 0,
+ .htb_defcls = 0,
+ };
+ rcu_assign_pointer(selq->active, init_params);
+
+ return 0;
+}
+
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+{
+ WARN_ON_ONCE(selq->is_prepared);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ selq->is_prepared = true;
+
+ mlx5e_selq_apply(selq);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+}
+
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->num_channels = params->num_channels;
+ selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
+ selq->standby->num_regular_queues =
+ selq->standby->num_channels * selq->standby->num_tcs;
+ selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
+}
+
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *selq_active =
+ rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock));
+
+ return selq_active->htb_maj_id;
+}
+
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->is_htb = htb_maj_id;
+ selq->standby->htb_maj_id = htb_maj_id;
+ selq->standby->htb_defcls = htb_defcls;
+}
+
+void mlx5e_selq_apply(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *old_params;
+
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+
+ old_params = rcu_replace_pointer(selq->active, selq->standby,
+ lockdep_is_held(selq->state_lock));
+ synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
+ selq->standby = old_params;
+}
+
+void mlx5e_selq_cancel(struct mlx5e_selq *selq)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
+ return mlx5e_get_dscp_up(priv, skb);
+#endif
+ if (skb_vlan_tag_present(skb))
+ return skb_vlan_tag_get_prio(skb);
+ return 0;
+}
+
+static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int up;
+
+ up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
+
+ return selq->num_regular_queues + up;
+}
+
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ u16 classid;
+
+ /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+ if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id)
+ classid = TC_H_MIN(skb->priority);
+ else
+ classid = selq->htb_defcls;
+
+ if (!classid)
+ return 0;
+
+ return mlx5e_htb_get_txq_by_classid(priv->htb, classid);
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_selq_params *selq;
+ int txq_ix, up;
+
+ selq = rcu_dereference_bh(priv->selq.active);
+
+ /* This is a workaround needed only for the mlx5e_netdev_change_profile
+ * flow that zeroes out the whole priv without unregistering the netdev
+ * and without preventing ndo_select_queue from being called.
+ */
+ if (unlikely(!selq))
+ return 0;
+
+ if (likely(!selq->is_special_queues)) {
+ /* No special queues, netdev_pick_tx returns one of the regular ones. */
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ /* Normalize any picked txq_ix to [0, num_channels),
+ * So we can return a txq_ix that matches the channel and
+ * packet UP.
+ */
+ return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
+ up * selq->num_channels;
+ }
+
+ if (unlikely(selq->htb_maj_id)) {
+ /* num_tcs == 1, shortcut for PTP */
+
+ txq_ix = mlx5e_select_htb_queue(priv, skb, selq);
+ if (txq_ix > 0)
+ return txq_ix;
+
+ if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
+ return selq->num_channels;
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
+ * If they are selected, switch to regular queues.
+ * Driver to select these queues only at mlx5e_select_ptpsq()
+ * and mlx5e_select_htb_queue().
+ */
+ return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
+ }
+
+ /* PTP is enabled */
+
+ if (mlx5e_use_ptpsq(skb))
+ return mlx5e_select_ptpsq(dev, skb, selq);
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Normalize any picked txq_ix to [0, num_channels). Queues in range
+ * [0, num_regular_queues) will be mapped to the corresponding channel
+ * index, so that we can apply the packet's UP (if num_tcs > 1).
+ * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
+ * because driver should select the PTP only at mlx5e_select_ptpsq().
+ */
+ txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ return txq_ix + up * selq->num_channels;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
new file mode 100644
index 000000000000..fd590f80e4d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_SELQ_H__
+#define __MLX5_EN_SELQ_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_selq_params;
+
+struct mlx5e_selq {
+ struct mlx5e_selq_params __rcu *active;
+ struct mlx5e_selq_params *standby;
+ struct mutex *state_lock; /* points to priv->state_lock */
+ bool is_prepared;
+};
+
+struct mlx5e_params;
+struct net_device;
+struct sk_buff;
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params);
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls);
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq);
+void mlx5e_selq_apply(struct mlx5e_selq *selq);
+void mlx5e_selq_cancel(struct mlx5e_selq *selq);
+
+static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
+{
+ while (unlikely(txq >= num_channels))
+ txq -= num_channels;
+ return txq;
+}
+
+static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
+{
+ if (unlikely(txq >= num_channels)) {
+ if (unlikely(txq >= num_channels << 3))
+ txq %= num_channels;
+ else
+ do
+ txq -= num_channels;
+ while (txq >= num_channels);
+ }
+ return txq;
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+
+#endif /* __MLX5_EN_SELQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
new file mode 100644
index 000000000000..21aab96357b5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_accept = {
+ .can_offload = tc_act_can_offload_accept,
+ .parse_action = tc_act_parse_accept,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
new file mode 100644
index 000000000000..3337241cfd84
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc/post_act.h"
+#include "en/tc_priv.h"
+#include "mlx5_core.h"
+
+static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress,
+ [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle,
+ [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap,
+ [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype,
+ [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample,
+ [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+ [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push,
+ [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop,
+ [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan,
+};
+
+static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+};
+
+/**
+ * mlx5e_tc_act_get() - Get an action parser for an action id.
+ * @act_id: Flow action id.
+ * @ns_type: flow namespace type.
+ */
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_tc_act **tc_acts;
+
+ tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic;
+
+ return tc_acts[act_id];
+}
+
+/**
+ * mlx5e_tc_act_init_parse_state() - Init a new parse_state.
+ * @parse_state: Parsing state.
+ * @flow: mlx5e tc flow being handled.
+ * @flow_action: flow action to parse.
+ * @extack: to set an error msg.
+ *
+ * The same parse_state should be passed to action parsers
+ * for tracking the current parsing state.
+ */
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ memset(parse_state, 0, sizeof(*parse_state));
+ parse_state->flow = flow;
+ parse_state->extack = extack;
+ parse_state->flow_action = flow_action;
+}
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder)
+{
+ struct flow_action_entry *act;
+ int i, j = 0;
+
+ flow_action_for_each(i, act, flow_action) {
+ /* Add CT action to be first. */
+ if (act->id == FLOW_ACTION_CT)
+ flow_action_reorder->entries[j++] = act;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT)
+ continue;
+ flow_action_reorder->entries[j++] = act;
+ }
+}
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct flow_action_entry *act;
+ struct mlx5e_tc_act *tc_act;
+ struct mlx5e_priv *priv;
+ int err = 0, i;
+
+ priv = parse_state->flow->priv;
+
+ flow_action_for_each(i, act, flow_action) {
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act || !tc_act->post_parse)
+ continue;
+
+ err = tc_act->post_parse(parse_state, priv, attr);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr)
+{
+ struct mlx5_core_dev *mdev = flow->priv->mdev;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ int err;
+
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+
+ /* Set handle on current post act rule to next post act rule. */
+ err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts);
+ if (err) {
+ mlx5_core_warn(mdev, "Failed setting post action handle");
+ return err;
+ }
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
new file mode 100644
index 000000000000..e1570ff056ae
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_H__
+#define __MLX5_EN_TC_ACT_H__
+
+#include <net/tc_act/tc_pedit.h>
+#include <net/flow_offload.h>
+#include <linux/netlink.h>
+#include "eswitch.h"
+#include "pedit.h"
+
+struct mlx5_flow_attr;
+
+struct mlx5e_tc_act_parse_state {
+ struct flow_action *flow_action;
+ struct mlx5e_tc_flow *flow;
+ struct netlink_ext_ack *extack;
+ u32 actions;
+ bool ct;
+ bool ct_clear;
+ bool encap;
+ bool decap;
+ bool mpls_push;
+ bool eth_push;
+ bool eth_pop;
+ bool ptype_host;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
+ int if_count;
+ struct mlx5_tc_ct_priv *ct_priv;
+};
+
+struct mlx5e_tc_act {
+ bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr);
+
+ int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ bool (*is_multi_table_act)(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr);
+
+ int (*offload_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act);
+
+ int (*destroy_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+
+ int (*stats_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+};
+
+struct mlx5e_tc_flow_action {
+ unsigned int num_entries;
+ struct flow_action_entry **entries;
+};
+
+extern struct mlx5e_tc_act mlx5e_tc_act_drop;
+extern struct mlx5e_tc_act mlx5e_tc_act_trap;
+extern struct mlx5e_tc_act mlx5e_tc_act_accept;
+extern struct mlx5e_tc_act mlx5e_tc_act_mark;
+extern struct mlx5e_tc_act mlx5e_tc_act_goto;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap;
+extern struct mlx5e_tc_act mlx5e_tc_act_csum;
+extern struct mlx5e_tc_act mlx5e_tc_act_pedit;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic;
+extern struct mlx5e_tc_act mlx5e_tc_act_ct;
+extern struct mlx5e_tc_act mlx5e_tc_act_sample;
+extern struct mlx5e_tc_act mlx5e_tc_act_ptype;
+extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress;
+extern struct mlx5e_tc_act mlx5e_tc_act_police;
+
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack);
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder);
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type);
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr);
+
+#endif /* __MLX5_EN_TC_ACT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
new file mode 100644
index 000000000000..c0f08ae6a57f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/tc_act/tc_csum.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+csum_offload_supported(struct mlx5e_priv *priv,
+ u32 action,
+ u32 update_flags,
+ struct netlink_ext_ack *extack)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC csum action is only offloaded with pedit");
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload TC csum action for some header/s");
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ return csum_offload_supported(flow->priv, attr->action,
+ act->csum_flags, parse_state->extack);
+}
+
+static int
+tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_csum = {
+ .can_offload = tc_act_can_offload_csum,
+ .parse_action = tc_act_parse_csum,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
new file mode 100644
index 000000000000..a829c94289c1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+
+static bool
+tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->ct && !clear_action) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ int err;
+
+ /* It's redundant to do ct clear more than once. */
+ if (clear_action && parse_state->ct_clear)
+ return 0;
+
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
+ &attr->parse_attr->mod_hdr_acts,
+ act, parse_state->extack);
+ if (err)
+ return err;
+
+
+ if (mlx5e_is_eswitch_flow(parse_state->flow))
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ if (clear_action) {
+ parse_state->ct_clear = true;
+ } else {
+ attr->flags |= MLX5_ATTR_FLAG_CT;
+ flow_flag_set(parse_state->flow, CT);
+ parse_state->ct = true;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+ int err;
+
+ /* If ct action exist, we can ignore previous ct_clear actions */
+ if (parse_state->ct)
+ return 0;
+
+ if (parse_state->ct_clear) {
+ err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Failed to set registers for ct clear");
+ return err;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Prevent handling of additional, redundant clear actions */
+ parse_state->ct_clear = false;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->ct.action & TCA_CT_ACT_CLEAR)
+ return false;
+
+ return true;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ct = {
+ .can_offload = tc_act_can_offload_ct,
+ .parse_action = tc_act_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .post_parse = tc_act_post_parse_ct,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
new file mode 100644
index 000000000000..dd025a95c439
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_drop = {
+ .can_offload = tc_act_can_offload_drop,
+ .parse_action = tc_act_parse_drop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
new file mode 100644
index 000000000000..25174f68613e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "eswitch.h"
+
+static int
+validate_goto_chain(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
+ bool ft_flow = mlx5e_is_ft_flow(flow);
+ u32 dest_chain = act->chain_index;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_eswitch *esw;
+ u32 reformat_and_fwd;
+ u32 max_chain;
+
+ esw = priv->mdev->priv.eswitch;
+ chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc);
+ max_chain = mlx5_chains_get_chain_range(chains);
+ reformat_and_fwd = is_esw ?
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+ if (ft_flow) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_chains_backwards_supported(chains) &&
+ dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+
+ if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ !reformat_and_fwd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto chain is not allowed if action has reformat or decap");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (validate_goto_chain(flow->priv, flow, attr, act, extack))
+ return false;
+
+ return true;
+}
+
+static int
+tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_chain = act->chain_index;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (!attr->dest_chain)
+ return 0;
+
+ if (parse_state->decap) {
+ /* It can be supported if we'll create a mapping for
+ * the tunnel device only (without tunnel), and set
+ * this tunnel id with this decap flow.
+ *
+ * On restore (miss), we'll just set this saved tunnel
+ * device.
+ */
+
+ NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported");
+ netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_goto = {
+ .can_offload = tc_act_can_offload_goto,
+ .parse_action = tc_act_parse_goto,
+ .post_parse = tc_act_post_parse_goto,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
new file mode 100644
index 000000000000..e8d227595b3e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en_tc.h"
+
+static bool
+tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->nic_attr->flow_tag = act->mark;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mark = {
+ .can_offload = tc_act_can_offload_mark,
+ .parse_action = tc_act_parse_mark,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
new file mode 100644
index 000000000000..4ac7de3f6afa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_macvlan.h>
+#include <linux/if_vlan.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+#include "en_rep.h"
+#include "lag/lag.h"
+
+static bool
+same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ return mlx5e_eswitch_vf_rep(priv->netdev) &&
+ priv->netdev == out_dev;
+}
+
+static int
+verify_uplink_forwarding(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rep_priv;
+
+ /* Forwarding non encapsulated traffic between
+ * uplink ports is allowed only if
+ * termination_table_raw_traffic cap is set.
+ *
+ * Input vport was stored attr->in_rep.
+ * In LAG case, *priv* is the private data of
+ * uplink which may be not the input vport.
+ */
+ rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep);
+
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+ mlx5e_eswitch_uplink_rep(out_dev)))
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+ termination_table_raw_traffic)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are both uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ } else if (out_dev != rep_priv->netdev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not the same uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static bool
+is_duplicated_output_device(struct net_device *dev,
+ struct net_device *out_dev,
+ int *ifindexes, int if_count,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < if_count; i++) {
+ if (ifindexes[i] == out_dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device");
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
+ out_dev->name);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct net_device *
+get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
+{
+ struct net_device *fdb_out_dev = out_dev;
+ struct net_device *uplink_upper;
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev) {
+ fdb_out_dev = uplink_dev;
+ } else if (netif_is_lag_master(out_dev)) {
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+ if (fdb_out_dev &&
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+ fdb_out_dev = NULL;
+ }
+ rcu_read_unlock();
+ return fdb_out_dev;
+}
+
+static bool
+tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return false;
+ }
+
+ if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device");
+ return false;
+ }
+
+ if (parse_state->eth_pop && !parse_state->mpls_push) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push");
+ return false;
+ }
+
+ if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push");
+ return false;
+ }
+
+ if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) {
+ /* Ignore forward to self rules generated
+ * by adding both mlx5 devs to the flow table
+ * block on a normal nft offload setup.
+ */
+ return false;
+ }
+
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't support more output ports, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "can't support more than %d output ports, can't offload forwarding\n",
+ esw_attr->out_count);
+ return false;
+ }
+
+ if (parse_state->encap ||
+ netdev_port_same_parent_id(priv->netdev, out_dev) ||
+ netif_is_ovs_master(out_dev))
+ return true;
+
+ if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return false;
+ }
+
+ NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
+
+ return false;
+}
+
+static int
+parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+
+ parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex;
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(parse_state->tun_info);
+
+ if (!parse_attr->tun_info[esw_attr->out_count])
+ return -ENOMEM;
+
+ parse_state->encap = false;
+
+ if (parse_state->mpls_push) {
+ memcpy(&parse_attr->mpls_info[esw_attr->out_count],
+ &parse_state->mpls_info, sizeof(parse_state->mpls_info));
+ parse_state->mpls_push = false;
+ }
+ esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP;
+ esw_attr->out_count++;
+ /* attr->dests[].rep is resolved when we handle encap */
+
+ return 0;
+}
+
+static int
+parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct net_device *out_dev = act->dev;
+ struct net_device *uplink_dev;
+ struct mlx5e_priv *out_priv;
+ struct mlx5_eswitch *esw;
+ bool is_uplink_rep;
+ int *ifindexes;
+ int if_count;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ ifindexes = parse_state->ifindexes;
+ if_count = parse_state->if_count;
+
+ if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack))
+ return -EOPNOTSUPP;
+
+ parse_state->ifindexes[if_count] = out_dev->ifindex;
+ parse_state->if_count++;
+ is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
+ err = mlx5_lag_do_mirred(priv->mdev, out_dev);
+ if (err)
+ return err;
+
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+ if (!out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(out_dev)) {
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack);
+ if (err)
+ return err;
+ }
+
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
+ err = verify_uplink_forwarding(priv, attr, out_dev, extack);
+ if (err)
+ return err;
+
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+
+ if (same_vf_reps(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself");
+ return -EOPNOTSUPP;
+ }
+
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+
+ /* If output device is bond master then rules are not explicit
+ * so we don't attempt to count them.
+ */
+ if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ attr->lag.count = true;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+static int
+parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+ return 0;
+}
+
+static int
+tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct net_device *out_dev = act->dev;
+ int err = -EOPNOTSUPP;
+
+ if (parse_state->encap)
+ err = parse_mirred_encap(parse_state, act, attr);
+ else if (netdev_port_same_parent_id(priv->netdev, out_dev))
+ err = parse_mirred(parse_state, act, priv, attr);
+ else if (netif_is_ovs_master(out_dev))
+ err = parse_mirred_ovs_master(parse_state, act, priv, attr);
+
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred = {
+ .can_offload = tc_act_can_offload_mirred,
+ .parse_action = tc_act_parse_mirred,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
new file mode 100644
index 000000000000..90b4c1b34776
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+
+ if (act->id != FLOW_ACTION_REDIRECT)
+ return false;
+
+ if (priv->netdev->netdev_ops != out_dev->netdev_ops ||
+ !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
+ netdev_name(priv->netdev),
+ out_dev->name);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex;
+ flow_flag_set(parse_state->flow, HAIRPIN);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = {
+ .can_offload = tc_act_can_offload_mirred_nic,
+ .parse_action = tc_act_parse_mirred_nic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
new file mode 100644
index 000000000000..f106190bf37c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/bareudp.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_priv *priv = parse_state->flow->priv;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) ||
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol");
+ return false;
+ }
+
+ return true;
+}
+
+static void
+copy_mpls_info(struct mlx5e_mpls_info *mpls_info,
+ const struct flow_action_entry *act)
+{
+ mpls_info->label = act->mpls_push.label;
+ mpls_info->tc = act->mpls_push.tc;
+ mpls_info->bos = act->mpls_push.bos;
+ mpls_info->ttl = act->mpls_push.ttl;
+}
+
+static int
+tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->mpls_push = true;
+ copy_mpls_info(&parse_state->mpls_info, act);
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct net_device *filter_dev;
+
+ filter_dev = attr->parse_attr->filter_dev;
+
+ /* we only support mpls pop if it is the first action
+ * or it is second action after tunnel key unset
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if ((act_index == 1 && !parse_state->decap) || act_index > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap");
+ return false;
+ }
+
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->esw_attr->eth.h_proto = act->mpls_pop.proto;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(parse_state->flow, L3_TO_L2_DECAP);
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_push = {
+ .can_offload = tc_act_can_offload_mpls_push,
+ .parse_action = tc_act_parse_mpls_push,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = {
+ .can_offload = tc_act_can_offload_mpls_pop,
+ .parse_action = tc_act_parse_mpls_pop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
new file mode 100644
index 000000000000..47597c524e59
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "pedit.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+
+static int pedit_header_offsets[] = {
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int
+set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) { /* disallow acting twice on the same location */
+ NL_SET_ERR_MSG_MOD(extack,
+ "curr_pmask and new mask same. Acting twice on same location");
+ goto out_err;
+ }
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ u8 htype = act->mangle.htype;
+ int err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+ goto out_err;
+ }
+
+ if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported");
+ goto out_err;
+ }
+
+ mask = act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
+
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack);
+ if (err)
+ goto out_err;
+
+ hdrs[cmd].pedits++;
+
+ return 0;
+out_err:
+ return err;
+}
+
+static bool
+tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+
+ err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs,
+ parse_state->extack);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_pedit = {
+ .can_offload = tc_act_can_offload_pedit,
+ .parse_action = tc_act_parse_pedit,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
new file mode 100644
index 000000000000..434c8bd710a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_PEDIT_H__
+#define __MLX5_EN_TC_ACT_PEDIT_H__
+
+#include "en_tc.h"
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct vlan_hdr vlan;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+struct pedit_headers_action {
+ struct pedit_headers vals;
+ struct pedit_headers masks;
+ u32 pedits;
+};
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
new file mode 100644
index 000000000000..c8e5ca65bb6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return false;
+ }
+ if (mlx5e_policer_validate(parse_state->flow_action, act,
+ parse_state->extack))
+ return false;
+
+ return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev);
+}
+
+static int
+fill_meter_params_from_act(const struct flow_action_entry *act,
+ struct mlx5e_flow_meter_params *params)
+{
+ params->index = act->hw_index;
+ if (act->police.rate_bytes_ps) {
+ params->mode = MLX5_RATE_LIMIT_BPS;
+ /* change rate to bits per second */
+ params->rate = act->police.rate_bytes_ps << 3;
+ params->burst = act->police.burst;
+ } else if (act->police.rate_pkt_ps) {
+ params->mode = MLX5_RATE_LIMIT_PPS;
+ params->rate = act->police.rate_pkt_ps;
+ params->burst = act->police.burst_pkt;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ int err;
+
+ err = fill_meter_params_from_act(act, &attr->meter_attr.params);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
+ attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_police(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_police_offload(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ int err = 0;
+
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
+ err = fill_meter_params_from_act(act, &params);
+ if (err)
+ return err;
+
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) {
+ meter = mlx5e_tc_meter_replace(priv->mdev, &params);
+ } else if (!IS_ERR(meter)) {
+ err = mlx5e_tc_meter_update(meter, &params);
+ mlx5e_tc_meter_put(meter);
+ }
+
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ err = PTR_ERR(meter);
+ }
+
+ return err;
+}
+
+static int
+tc_act_police_destroy(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+ /* first put for the get and second for cleanup */
+ mlx5e_tc_meter_put(meter);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+static int
+tc_act_police_stats(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ u64 bytes, packets, drops, lastuse;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+
+ mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse);
+ flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_police = {
+ .can_offload = tc_act_can_offload_police,
+ .parse_action = tc_act_parse_police,
+ .is_multi_table_act = tc_act_is_multi_table_act_police,
+ .offload_action = tc_act_police_offload,
+ .destroy_action = tc_act_police_destroy,
+ .stats_action = tc_act_police_stats,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
new file mode 100644
index 000000000000..6454b031ff7a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (act->ptype != PACKET_HOST) {
+ NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host");
+ return -EOPNOTSUPP;
+ }
+
+ parse_state->ptype_host = true;
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ptype = {
+ .can_offload = tc_act_can_offload_ptype,
+ .parse_action = tc_act_parse_ptype,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
new file mode 100644
index 000000000000..ad09a8a5f36e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev)
+ return false;
+
+ if (!netif_is_ovs_master(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is supported only for OVS internal ports");
+ return false;
+ }
+
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is not supported from internal port");
+ return false;
+ }
+
+ if (!parse_state->ptype_host) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress requires ptype=host action");
+ return false;
+ }
+
+ if (esw_attr->out_count) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress is supported only as single destination");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = {
+ .can_offload = tc_act_can_offload_redirect_ingress,
+ .parse_action = tc_act_parse_redirect_ingress,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
new file mode 100644
index 000000000000..2c0196431302
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/psample.h>
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc/act/sample.h"
+
+static bool
+tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ bool ct_nat;
+
+ ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+
+ if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
+ NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
+
+ sample_attr->rate = act->sample.rate;
+ sample_attr->group_num = act->sample.psample_group->group_num;
+
+ if (act->sample.truncate)
+ sample_attr->trunc_size = act->sample.trunc_size;
+
+ attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
+ flow_flag_set(parse_state->flow, SAMPLE);
+
+ return 0;
+}
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr)
+{
+ if (MLX5_CAP_GEN(mdev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return true;
+
+ return false;
+}
+
+static bool
+tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr);
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_sample = {
+ .can_offload = tc_act_can_offload_sample,
+ .parse_action = tc_act_parse_sample,
+ .is_multi_table_act = tc_act_is_multi_table_act_sample,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
new file mode 100644
index 000000000000..3efb3a15c5d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__
+#define __MLX5_EN_TC_ACT_SAMPLE_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr);
+
+#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
new file mode 100644
index 000000000000..53b270f652b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->flow_action->num_entries != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_trap = {
+ .can_offload = tc_act_can_offload_trap,
+ .parse_action = tc_act_parse_trap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
new file mode 100644
index 000000000000..b4fa2de9711d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (!act->tunnel) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Zero tunnel attributes is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->tun_info = act->tunnel;
+ parse_state->encap = true;
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->decap = true;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
+ .can_offload = tc_act_can_offload_tun_encap,
+ .parse_action = tc_act_parse_tun_encap,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
+ .can_offload = tc_act_can_offload_tun_decap,
+ .parse_action = tc_act_parse_tun_decap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
new file mode 100644
index 000000000000..b86ac604d0c2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+static int
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry prio_tag_act = {
+ .vlan.vid = 0,
+ .vlan.prio =
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_value(*action,
+ &parse_attr->spec),
+ first_prio) &
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_criteria(*action,
+ &parse_attr->spec),
+ first_prio),
+ };
+
+ return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+ &prio_tag_act, parse_attr, action,
+ extack);
+}
+
+static int
+parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action,
+ struct netlink_ext_ack *extack,
+ struct mlx5e_tc_act_parse_state *parse_state)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH) {
+ NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported");
+ return -EOPNOTSUPP;
+ }
+
+ switch (act->id) {
+ case FLOW_ACTION_VLAN_POP:
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vlan push action is not supported for vlan depth > 1");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
+ act->vlan.prio)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ break;
+ case FLOW_ACTION_VLAN_POP_ETH:
+ parse_state->eth_pop = true;
+ break;
+ case FLOW_ACTION_VLAN_PUSH_ETH:
+ if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP))
+ return -EOPNOTSUPP;
+ parse_state->eth_push = true;
+ memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN);
+ memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN");
+ return -EINVAL;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL);
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+ rcu_read_unlock();
+ if (!*out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(*out_dev))
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack);
+
+ return err;
+}
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int nest_level, err = 0;
+
+ nest_level = attr->parse_attr->filter_dev->lower_level -
+ priv->netdev->lower_level;
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action,
+ extack, NULL);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
+ /* Replace vlan pop+push with vlan modify */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
+ attr->parse_attr, &attr->action,
+ parse_state->extack);
+ } else {
+ err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
+ parse_state->extack, parse_state);
+ }
+
+ if (err)
+ return err;
+
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
+ * tag rewrite.
+ */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
+ &attr->action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan = {
+ .can_offload = tc_act_can_offload_vlan,
+ .parse_action = tc_act_parse_vlan,
+ .post_parse = tc_act_post_parse_vlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
new file mode 100644
index 000000000000..2fa58c6f44eb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_VLAN_H__
+#define __MLX5_EN_TC_ACT_VLAN_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
new file mode 100644
index 000000000000..9a8a1a6bd99e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ u16 mask16 = VLAN_VID_MASK;
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
+ const struct flow_action_entry pedit_act = {
+ .id = FLOW_ACTION_MANGLE,
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
+ };
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
+ int err;
+
+ headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs,
+ extack);
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(parse_state->flow);
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
+ &attr->action, parse_state->extack);
+ if (err)
+ return err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
+ .can_offload = tc_act_can_offload_vlan_mangle,
+ .parse_action = tc_act_parse_vlan_mangle,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
new file mode 100644
index 000000000000..bb6b1a979ba1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_TC_CT_FS_H__
+#define __MLX5_EN_TC_CT_FS_H__
+
+struct mlx5_ct_fs {
+ const struct net_device *netdev;
+ struct mlx5_core_dev *dev;
+
+ /* private data */
+ void *priv_data[];
+};
+
+struct mlx5_ct_fs_rule {
+};
+
+struct mlx5_ct_fs_ops {
+ int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct);
+ void (*destroy)(struct mlx5_ct_fs *fs);
+
+ struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule);
+ void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule);
+
+ size_t priv_size;
+};
+
+static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs)
+{
+ return &fs->priv_data;
+}
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void);
+
+#if IS_ENABLED(CONFIG_MLX5_SW_STEERING)
+struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void);
+#else
+static inline struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */
+
+#endif /* __MLX5_EN_TC_CT_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
new file mode 100644
index 000000000000..ae4f55be48ce
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "en_tc.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args)
+
+struct mlx5_ct_fs_dmfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+};
+
+static int
+mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ return 0;
+}
+
+static void
+mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs)
+{
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5e_priv *priv = netdev_priv(fs->netdev);
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule;
+ int err;
+
+ dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL);
+ if (!dmfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ if (IS_ERR(dmfs_rule->rule)) {
+ err = PTR_ERR(dmfs_rule->rule);
+ ct_dbg("Failed to add ct entry fs rule");
+ goto err_insert;
+ }
+
+ dmfs_rule->attr = attr;
+
+ return &dmfs_rule->fs_rule;
+
+err_insert:
+ kfree(dmfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_dmfs_rule,
+ fs_rule);
+
+ mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr);
+ kfree(dmfs_rule);
+}
+
+static struct mlx5_ct_fs_ops dmfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_dmfs_init,
+ .destroy = mlx5_ct_fs_dmfs_destroy,
+};
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void)
+{
+ return &dmfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
new file mode 100644
index 000000000000..2b80fe73549d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/refcount.h>
+
+#include "en_tc.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#include "lib/smfs.h"
+
+#define INIT_ERR_PREFIX "ct_fs_smfs init failed"
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args)
+#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16)
+
+struct mlx5_ct_fs_smfs_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+ struct list_head list;
+ int prio;
+ refcount_t ref;
+};
+
+struct mlx5_ct_fs_smfs_matchers {
+ struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
+ struct list_head used;
+};
+
+struct mlx5_ct_fs_smfs {
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl;
+ struct mlx5_ct_fs_smfs_matchers matchers;
+ struct mlx5_ct_fs_smfs_matchers matchers_nat;
+ struct mlx5dr_action *fwd_action;
+ struct mlx5_flow_table *ct_nat;
+ struct mutex lock; /* Guards matchers */
+};
+
+struct mlx5_ct_fs_smfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5dr_rule *rule;
+ struct mlx5dr_action *count_action;
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+};
+
+static inline void
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+ bool gre)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+
+ if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version)))
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ if (likely(ipv4)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6));
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6));
+ }
+
+ if (likely(tcp)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(MLX5_CT_TCP_FLAGS_MASK));
+ } else if (!gre) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
+ }
+
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK);
+}
+
+static struct mlx5dr_matcher *
+mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
+ bool tcp, bool gre, u32 priority)
+{
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
+
+ dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
+ kvfree(spec);
+ if (!dr_matcher)
+ return ERR_PTR(-EINVAL);
+
+ return dr_matcher;
+}
+
+static struct mlx5_ct_fs_smfs_matcher *
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
+ struct mlx5_ct_fs_smfs_matchers *matchers;
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5dr_table *tbl;
+ struct list_head *prev;
+ int prio;
+
+ matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
+ smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
+
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ return smfs_matcher;
+
+ mutex_lock(&fs_smfs->lock);
+
+ /* Retry with lock, as another thread might have already created the relevant matcher
+ * till we acquired the lock
+ */
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ goto out_unlock;
+
+ // Find next available priority in sorted used list
+ prio = 0;
+ prev = &matchers->used;
+ list_for_each_entry(m, &matchers->used, list) {
+ prev = &m->list;
+
+ if (m->prio == prio)
+ prio = m->prio + 1;
+ else
+ break;
+ }
+
+ tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
+ dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
+ if (IS_ERR(dr_matcher)) {
+ netdev_warn(fs->netdev,
+ "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+ nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
+
+ smfs_matcher = ERR_CAST(dr_matcher);
+ goto out_unlock;
+ }
+
+ smfs_matcher->dr_matcher = dr_matcher;
+ smfs_matcher->prio = prio;
+ list_add(&smfs_matcher->list, prev);
+ refcount_set(&smfs_matcher->ref, 1);
+
+out_unlock:
+ mutex_unlock(&fs_smfs->lock);
+ return smfs_matcher;
+}
+
+static void
+mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock))
+ return;
+
+ mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher);
+ list_del(&smfs_matcher->list);
+ mutex_unlock(&fs_smfs->lock);
+}
+
+static int
+mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl;
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct);
+ ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat);
+ ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct);
+ fs_smfs->ct_nat = ct_nat;
+
+ if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) {
+ netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables");
+ return -EOPNOTSUPP;
+ }
+
+ ct_dbg("using smfs steering");
+
+ fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl);
+ if (!fs_smfs->fwd_action) {
+ return -EINVAL;
+ }
+
+ fs_smfs->ct_tbl = ct_tbl;
+ fs_smfs->ct_nat_tbl = ct_nat_tbl;
+ mutex_init(&fs_smfs->lock);
+ INIT_LIST_HEAD(&fs_smfs->matchers.used);
+ INIT_LIST_HEAD(&fs_smfs->matchers_nat.used);
+
+ return 0;
+}
+
+static void
+mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ mlx5_smfs_action_destroy(fs_smfs->fwd_action);
+}
+
+static inline bool
+mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
+{
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+ const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+ const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+ const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
+
+ return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
+ used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
+}
+
+static bool
+mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule)
+{
+ struct flow_match_ipv4_addrs ipv4_addrs;
+ struct flow_match_ipv6_addrs ipv6_addrs;
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+ struct flow_match_ports ports;
+ struct flow_match_tcp tcp;
+
+ if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) {
+ ct_dbg("rule uses unexpected dissectors (0x%08x)",
+ flow_rule->match.dissector->used_keys);
+ return false;
+ }
+
+ flow_rule_match_basic(flow_rule, &basic);
+ flow_rule_match_control(flow_rule, &control);
+ flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
+ flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
+ if (basic.key->ip_proto != IPPROTO_GRE)
+ flow_rule_match_ports(flow_rule, &ports);
+ if (basic.key->ip_proto == IPPROTO_TCP)
+ flow_rule_match_tcp(flow_rule, &tcp);
+
+ if (basic.mask->n_proto != htons(0xFFFF) ||
+ (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
+ basic.mask->ip_proto != 0xFF ||
+ (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+ basic.key->ip_proto != IPPROTO_GRE)) {
+ ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
+ ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
+ basic.key->ip_proto, basic.mask->ip_proto);
+ return false;
+ }
+
+ if (basic.key->ip_proto != IPPROTO_GRE &&
+ (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
+ ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
+ ports.mask->src, ports.mask->dst);
+ return false;
+ }
+
+ if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) {
+ ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags);
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+ struct mlx5_ct_fs_smfs_rule *smfs_rule;
+ struct mlx5dr_action *actions[5];
+ struct mlx5dr_rule *rule;
+ int num_actions = 0, err;
+ bool nat, tcp, ipv4, gre;
+
+ if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL);
+ if (!smfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter));
+ if (!smfs_rule->count_action) {
+ err = -EINVAL;
+ goto err_count;
+ }
+
+ actions[num_actions++] = smfs_rule->count_action;
+ actions[num_actions++] = attr->modify_hdr->action.dr_action;
+ actions[num_actions++] = fs_smfs->fwd_action;
+
+ nat = (attr->ft == fs_smfs->ct_nat);
+ ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
+ tcp = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_TCP;
+ gre = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_GRE;
+
+ smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
+ if (IS_ERR(smfs_matcher)) {
+ err = PTR_ERR(smfs_matcher);
+ goto err_matcher;
+ }
+
+ rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
+ spec->flow_context.flow_source);
+ if (!rule) {
+ err = -EINVAL;
+ goto err_create;
+ }
+
+ smfs_rule->rule = rule;
+ smfs_rule->smfs_matcher = smfs_matcher;
+
+ return &smfs_rule->fs_rule;
+
+err_create:
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher);
+err_matcher:
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+err_count:
+ kfree(smfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_smfs_rule,
+ fs_rule);
+
+ mlx5_smfs_rule_destroy(smfs_rule->rule);
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher);
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+ kfree(smfs_rule);
+}
+
+static struct mlx5_ct_fs_ops fs_smfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_smfs_init,
+ .destroy = mlx5_ct_fs_smfs_destroy,
+
+ .priv_size = sizeof(struct mlx5_ct_fs_smfs),
+};
+
+struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return &fs_smfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
new file mode 100644
index 000000000000..be74e1403328
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/math64.h>
+#include "lib/aso.h"
+#include "en/tc/post_act.h"
+#include "meter.h"
+#include "en/tc_priv.h"
+
+#define MLX5_START_COLOR_SHIFT 28
+#define MLX5_METER_MODE_SHIFT 24
+#define MLX5_CBS_EXP_SHIFT 24
+#define MLX5_CBS_MAN_SHIFT 16
+#define MLX5_CIR_EXP_SHIFT 8
+
+/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */
+#define MLX5_CONST_CIR 8000000000ULL
+#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e))
+#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1)
+
+/* cbs = cbs_mantissa*2^cbs_exponent */
+#define MLX5_CALC_CBS(m, e) ((m) << (e))
+#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1)
+#define MLX5_MAX_HW_CBS 0x7FFFFFFF
+
+struct mlx5e_flow_meter_aso_obj {
+ struct list_head entry;
+ int base_id;
+ int total_meters;
+
+ unsigned long meters_map[0]; /* must be at the end of this struct */
+};
+
+struct mlx5e_flow_meters {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_aso *aso;
+ struct mutex aso_lock; /* Protects aso operations */
+ int log_granularity;
+ u32 pdn;
+
+ DECLARE_HASHTABLE(hashtbl, 8);
+
+ struct mutex sync_lock; /* protect flow meter operations */
+ struct list_head partial_list;
+ struct list_head full_list;
+
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_post_act *post_act;
+};
+
+static void
+mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp)
+{
+ s64 _cir, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cir << e;
+ if ((s64)m < 0) /* overflow */
+ break;
+ m = div64_u64(m, MLX5_CONST_CIR);
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cir = MLX5_CALC_CIR(m, e);
+ _delta = cir - _cir;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+static void
+mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp)
+{
+ s64 _cbs, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cbs >> e;
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cbs = MLX5_CALC_CBS(m, e);
+ _delta = cbs - _cbs;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params)
+{
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl;
+ struct mlx5_wqe_aso_data_seg *aso_data;
+ struct mlx5e_flow_meters *flow_meters;
+ u8 cir_man, cir_exp, cbs_man, cbs_exp;
+ struct mlx5_aso_wqe *aso_wqe;
+ unsigned long expires;
+ struct mlx5_aso *aso;
+ u64 rate, burst;
+ u8 ds_cnt;
+ int err;
+
+ rate = meter_params->rate;
+ burst = meter_params->burst;
+
+ /* HW treats each packet as 128 bytes in PPS mode */
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS) {
+ rate <<= 10;
+ burst <<= 7;
+ }
+
+ if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS)
+ return -EINVAL;
+
+ /* HW has limitation of total 31 bits for cbs */
+ if (burst > MLX5_MAX_HW_CBS) {
+ mlx5_core_warn(mdev,
+ "burst(%lld) is too large, use HW allowed value(%d)\n",
+ burst, MLX5_MAX_HW_CBS);
+ burst = MLX5_MAX_HW_CBS;
+ }
+
+ mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode);
+ mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp);
+ mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n",
+ rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man);
+ mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp);
+ mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n",
+ burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man);
+
+ if (!cir_man || !cbs_man)
+ return -EINVAL;
+
+ flow_meters = meter->flow_meters;
+ aso = flow_meters->aso;
+
+ mutex_lock(&flow_meters->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(aso);
+ ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS);
+ mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER);
+
+ aso_ctrl = &aso_wqe->aso_ctrl;
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6;
+ aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
+ aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6;
+ aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32));
+
+ aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1);
+ memset(aso_data, 0, sizeof(*aso_data));
+ aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */
+ (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT));
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS)
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT);
+ else
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT);
+
+ aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) |
+ (cbs_man << MLX5_CBS_MAN_SHIFT) |
+ (cir_exp << MLX5_CIR_EXP_SHIFT) |
+ cir_man);
+
+ mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl);
+
+ /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ err = mlx5_aso_poll_cq(aso, true);
+ if (err)
+ usleep_range(2, 10);
+ } while (err && time_is_after_jiffies(expires));
+ mutex_unlock(&flow_meters->aso_lock);
+
+ return err;
+}
+
+static int
+mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ void *obj;
+ int err;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity);
+
+ obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj);
+ MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err) {
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id);
+ }
+
+ return err;
+}
+
+static void
+mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
+{
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5_fc *counter;
+ int err, pos, total;
+ u32 id;
+
+ meter = kzalloc(sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_red_counter;
+ }
+ meter->red_counter = counter;
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_green_counter;
+ }
+ meter->green_counter = counter;
+
+ meters_obj = list_first_entry_or_null(&flow_meters->partial_list,
+ struct mlx5e_flow_meter_aso_obj,
+ entry);
+ /* 2 meters in one object */
+ total = 1 << (flow_meters->log_granularity + 1);
+ if (!meters_obj) {
+ err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create flow meter ASO object\n");
+ goto err_create;
+ }
+
+ meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total),
+ GFP_KERNEL);
+ if (!meters_obj) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ meters_obj->base_id = id;
+ meters_obj->total_meters = total;
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ pos = 0;
+ } else {
+ pos = find_first_zero_bit(meters_obj->meters_map, total);
+ if (bitmap_weight(meters_obj->meters_map, total) == total - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->full_list);
+ }
+ }
+
+ bitmap_set(meters_obj->meters_map, pos, 1);
+ meter->flow_meters = flow_meters;
+ meter->meters_obj = meters_obj;
+ meter->obj_id = meters_obj->base_id + pos / 2;
+ meter->idx = pos % 2;
+
+ mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+
+ return meter;
+
+err_mem:
+ mlx5e_flow_meter_destroy_aso_obj(mdev, id);
+err_create:
+ mlx5_fc_destroy(mdev, meter->green_counter);
+err_green_counter:
+ mlx5_fc_destroy(mdev, meter->red_counter);
+err_red_counter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static void
+__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ int n, pos;
+
+ mlx5_fc_destroy(mdev, meter->green_counter);
+ mlx5_fc_destroy(mdev, meter->red_counter);
+
+ meters_obj = meter->meters_obj;
+ pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx;
+ bitmap_clear(meters_obj->meters_map, pos, 1);
+ n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters);
+ if (n == 0) {
+ list_del(&meters_obj->entry);
+ mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id);
+ kfree(meters_obj);
+ } else if (n == meters_obj->total_meters - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ }
+
+ mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+ kfree(meter);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index)
+ if (meter->params.index == index)
+ goto add_ref;
+
+ return ERR_PTR(-ENOENT);
+
+add_ref:
+ meter->refcnt++;
+
+ return meter;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ mutex_unlock(&flow_meters->sync_lock);
+
+ return meter;
+}
+
+static void
+__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ if (--meter->refcnt == 0) {
+ hash_del(&meter->hlist);
+ __mlx5e_flow_meter_free(meter);
+ }
+}
+
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+
+ mutex_lock(&flow_meters->sync_lock);
+ __mlx5e_tc_meter_put(meter);
+ mutex_unlock(&flow_meters->sync_lock);
+}
+
+static struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = __mlx5e_flow_meter_alloc(flow_meters);
+ if (IS_ERR(meter))
+ return meter;
+
+ hash_add(flow_meters->hashtbl, &meter->hlist, params->index);
+ meter->params.index = params->index;
+ meter->refcnt++;
+
+ return meter;
+}
+
+static int
+__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ int err = 0;
+
+ if (meter->params.mode != params->mode || meter->params.rate != params->rate ||
+ meter->params.burst != params->burst) {
+ err = mlx5e_tc_meter_modify(mdev, meter, params);
+ if (err)
+ goto out;
+
+ meter->params.mode = params->mode;
+ meter->params.rate = params->rate;
+ meter->params.burst = params->burst;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&flow_meters->sync_lock);
+ err = __mlx5e_tc_meter_update(meter, params);
+ mutex_unlock(&flow_meters->sync_lock);
+ return err;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ if (IS_ERR(meter)) {
+ meter = mlx5e_tc_meter_alloc(flow_meters, params);
+ if (IS_ERR(meter)) {
+ err = PTR_ERR(meter);
+ goto err_get;
+ }
+ }
+
+ err = __mlx5e_tc_meter_update(meter, params);
+ if (err)
+ goto err_update;
+
+ mutex_unlock(&flow_meters->sync_lock);
+ return meter;
+
+err_update:
+ __mlx5e_tc_meter_put(meter);
+err_get:
+ mutex_unlock(&flow_meters->sync_lock);
+ return ERR_PTR(err);
+}
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters)
+{
+ return flow_meters->ns_type;
+}
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (IS_ERR_OR_NULL(post_act)) {
+ netdev_dbg(priv->netdev,
+ "flow meter offload is not supported, post action is missing\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL);
+ if (!flow_meters)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err);
+ goto err_out;
+ }
+
+ flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn);
+ if (IS_ERR(flow_meters->aso)) {
+ mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n");
+ err = PTR_ERR(flow_meters->aso);
+ goto err_sq;
+ }
+
+ mutex_init(&flow_meters->sync_lock);
+ INIT_LIST_HEAD(&flow_meters->partial_list);
+ INIT_LIST_HEAD(&flow_meters->full_list);
+
+ flow_meters->ns_type = ns_type;
+ flow_meters->mdev = mdev;
+ flow_meters->post_act = post_act;
+ mutex_init(&flow_meters->aso_lock);
+ flow_meters->log_granularity = min_t(int, 6,
+ MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc));
+
+ return flow_meters;
+
+err_sq:
+ mlx5_core_dealloc_pd(mdev, flow_meters->pdn);
+err_out:
+ kfree(flow_meters);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters)
+{
+ if (IS_ERR_OR_NULL(flow_meters))
+ return;
+
+ mlx5_aso_destroy(flow_meters->aso);
+ mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn);
+ kfree(flow_meters);
+}
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse)
+{
+ u64 bytes1, packets1, lastuse1;
+ u64 bytes2, packets2, lastuse2;
+
+ mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1);
+ mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2);
+
+ *bytes = bytes1 + bytes2;
+ *packets = packets1 + packets2;
+ *drops = packets2;
+ *lastuse = max_t(u64, lastuse1, lastuse2);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
new file mode 100644
index 000000000000..6de6e8a16327
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_FLOW_METER_H__
+#define __MLX5_EN_FLOW_METER_H__
+
+struct mlx5e_post_meter_priv;
+struct mlx5e_flow_meter_aso_obj;
+struct mlx5e_flow_meters;
+struct mlx5_flow_attr;
+
+enum mlx5e_flow_meter_mode {
+ MLX5_RATE_LIMIT_BPS,
+ MLX5_RATE_LIMIT_PPS,
+};
+
+struct mlx5e_flow_meter_params {
+ enum mlx5e_flow_meter_mode mode;
+ /* police action index */
+ u32 index;
+ u64 rate;
+ u64 burst;
+};
+
+struct mlx5e_flow_meter_handle {
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ u32 obj_id;
+ u8 idx;
+
+ int refcnt;
+ struct hlist_node hlist;
+ struct mlx5e_flow_meter_params params;
+
+ struct mlx5_fc *green_counter;
+ struct mlx5_fc *red_counter;
+};
+
+struct mlx5e_meter_attr {
+ struct mlx5e_flow_meter_params params;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5e_post_meter_priv *post_meter;
+};
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params);
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter);
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params);
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters);
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_action);
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters);
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse);
+
+#endif /* __MLX5_EN_FLOW_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
index 31b4e39be2d3..4e48946c4c2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#include "en/tc_priv.h"
#include "en_tc.h"
#include "post_act.h"
#include "mlx5_core.h"
@@ -21,9 +22,9 @@ struct mlx5e_post_act_handle {
u32 id;
};
-#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
-#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0)
-#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX
+#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK
struct mlx5e_post_act *
mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
@@ -35,7 +36,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
int err;
if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) {
- if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
err = -EOPNOTSUPP;
goto err_check;
@@ -75,21 +76,47 @@ mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
kfree(post_act);
}
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Post action rule matches on fte_id and executes original rule's tc rule action */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+
+ kvfree(spec);
+ return 0;
+
+err_rule:
+ kvfree(spec);
+ return err;
+}
+
struct mlx5e_post_act_handle *
mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
{
u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
- struct mlx5e_post_act_handle *handle = NULL;
- struct mlx5_flow_attr *post_attr = NULL;
- struct mlx5_flow_spec *spec = NULL;
+ struct mlx5e_post_act_handle *handle;
+ struct mlx5_flow_attr *post_attr;
int err;
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
- if (!handle || !spec || !post_attr) {
+ if (!handle || !post_attr) {
kfree(post_attr);
- kvfree(spec);
kfree(handle);
return ERR_PTR(-ENOMEM);
}
@@ -100,7 +127,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
post_attr->ft = post_act->ft;
post_attr->inner_match_level = MLX5_MATCH_NONE;
post_attr->outer_match_level = MLX5_MATCH_NONE;
- post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+ post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
handle->ns_type = post_act->ns_type;
/* Splits were handled before post action */
@@ -112,36 +140,29 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
if (err)
goto err_xarray;
- /* Post action rule matches on fte_id and executes original rule's
- * tc rule action
- */
- mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG,
- handle->id, MLX5_POST_ACTION_MASK);
-
- handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr);
- if (IS_ERR(handle->rule)) {
- err = PTR_ERR(handle->rule);
- netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
- goto err_rule;
- }
handle->attr = post_attr;
- kvfree(spec);
return handle;
-err_rule:
- xa_erase(&post_act->ids, handle->id);
err_xarray:
kfree(post_attr);
- kvfree(spec);
kfree(handle);
return ERR_PTR(err);
}
void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr);
+ handle->rule = NULL;
+}
+
+void
mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
{
- mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr);
+ if (!IS_ERR_OR_NULL(handle->rule))
+ mlx5e_tc_post_act_unoffload(post_act, handle);
xa_erase(&post_act->ids, handle->id);
kfree(handle->attr);
kfree(handle);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
index b530ec1981a5..f476774c0b75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -24,6 +24,14 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
void
mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
struct mlx5_flow_table *
mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
new file mode 100644
index 000000000000..8b77e822810e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "post_meter.h"
+#include "en/tc/post_act.h"
+
+#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG)
+#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG)
+
+struct mlx5e_post_meter_priv {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *fwd_green_rule;
+ struct mlx5_flow_handle *drop_red_rule;
+};
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->ft;
+}
+
+static int
+mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+
+ root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type);
+ if (!root_ns) {
+ mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(post_meter->ft)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
+ return PTR_ERR(post_meter->ft);
+ }
+
+ return 0;
+}
+
+static int
+mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *misc2, *match_criteria;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in);
+ if (IS_ERR(post_meter->fg)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n");
+ err = PTR_ERR(post_meter->fg);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static int
+mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[0].counter_id = mlx5_fc_id(red_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n");
+ err = PTR_ERR(rule);
+ goto err_red;
+ }
+ post_meter->drop_red_rule = rule;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = mlx5e_tc_post_act_get_ft(post_act);
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(green_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n");
+ err = PTR_ERR(rule);
+ goto err_green;
+ }
+ post_meter->fwd_green_rule = rule;
+
+ kvfree(spec);
+ return 0;
+
+err_green:
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+err_red:
+ kvfree(spec);
+ return err;
+}
+
+static void
+mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+ mlx5_del_flow_rules(post_meter->fwd_green_rule);
+}
+
+static void
+mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_group(post_meter->fg);
+}
+
+static void
+mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_table(post_meter->ft);
+}
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5e_post_meter_priv *post_meter;
+ int err;
+
+ post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL);
+ if (!post_meter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_post_meter_table_create(priv, ns_type, post_meter);
+ if (err)
+ goto err_ft;
+
+ err = mlx5e_post_meter_fg_create(priv, post_meter);
+ if (err)
+ goto err_fg;
+
+ err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter,
+ red_counter);
+ if (err)
+ goto err_rules;
+
+ return post_meter;
+
+err_rules:
+ mlx5e_post_meter_fg_destroy(post_meter);
+err_fg:
+ mlx5e_post_meter_table_destroy(post_meter);
+err_ft:
+ kfree(post_meter);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_rules_destroy(post_meter);
+ mlx5e_post_meter_fg_destroy(post_meter);
+ mlx5e_post_meter_table_destroy(post_meter);
+ kfree(post_meter);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
new file mode 100644
index 000000000000..34d0e4b9fc7a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_POST_METER_H__
+#define __MLX5_EN_POST_METER_H__
+
+#define packet_color_to_reg { \
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \
+ .moffset = 0, \
+ .mlen = 8, \
+ .soffset = MLX5_BYTE_OFF(fte_match_param, \
+ misc_parameters_2.metadata_reg_c_5), \
+}
+
+struct mlx5e_post_meter_priv;
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter);
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter);
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter);
+
+#endif /* __MLX5_EN_POST_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index df6888c4793c..1cbd2eb9d04f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -5,6 +5,8 @@
#include <net/psample.h>
#include "en/mapping.h"
#include "en/tc/post_act.h"
+#include "en/tc/act/sample.h"
+#include "en/mod_hdr.h"
#include "sample.h"
#include "eswitch.h"
#include "en_tc.h"
@@ -45,14 +47,12 @@ struct mlx5e_sample_flow {
struct mlx5_flow_handle *pre_rule;
struct mlx5_flow_attr *post_attr;
struct mlx5_flow_handle *post_rule;
- struct mlx5e_post_act_handle *post_act_handle;
};
struct mlx5e_sample_restore {
struct hlist_node hlist;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_flow_handle *rule;
- struct mlx5e_post_act_handle *post_act_handle;
u32 obj_id;
int count;
};
@@ -93,6 +93,7 @@ sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest.vport.num = esw->manager_vport;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
if (IS_ERR(tc_psample->termtbl_rule)) {
err = PTR_ERR(tc_psample->termtbl_rule);
@@ -230,69 +231,46 @@ sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
*/
static struct mlx5_modify_hdr *
sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
- struct mlx5e_post_act_handle *handle)
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
- struct mlx5e_tc_mod_hdr_acts mod_acts = {};
struct mlx5_modify_hdr *modify_hdr;
int err;
- err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB,
CHAIN_TO_REG, obj_id);
if (err)
goto err_set_regc0;
- if (handle) {
- err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts);
- if (err)
- goto err_post_act;
- }
-
modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
- mod_acts.num_actions,
- mod_acts.actions);
+ mod_acts->num_actions,
+ mod_acts->actions);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_modify_hdr;
}
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(mod_acts);
return modify_hdr;
err_modify_hdr:
-err_post_act:
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(mod_acts);
err_set_regc0:
return ERR_PTR(err);
}
-static u32
-restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle)
-{
- return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0);
-}
-
-static bool
-restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id,
- struct mlx5e_post_act_handle *post_act_handle)
-{
- return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle;
-}
-
static struct mlx5e_sample_restore *
sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
- struct mlx5e_post_act_handle *post_act_handle)
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
struct mlx5_eswitch *esw = tc_psample->esw;
struct mlx5_core_dev *mdev = esw->dev;
struct mlx5e_sample_restore *restore;
struct mlx5_modify_hdr *modify_hdr;
- u32 hash_key;
int err;
mutex_lock(&tc_psample->restore_lock);
- hash_key = restore_hash(obj_id, post_act_handle);
- hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key)
- if (restore_equal(restore, obj_id, post_act_handle))
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id)
+ if (restore->obj_id == obj_id)
goto add_ref;
restore = kzalloc(sizeof(*restore), GFP_KERNEL);
@@ -301,9 +279,8 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
goto err_alloc;
}
restore->obj_id = obj_id;
- restore->post_act_handle = post_act_handle;
- modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle);
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_modify_hdr;
@@ -316,7 +293,7 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
goto err_restore;
}
- hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key);
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id);
add_ref:
restore->count++;
mutex_unlock(&tc_psample->restore_lock);
@@ -402,7 +379,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
post_attr->chain = 0;
post_attr->prio = 0;
post_attr->ft = default_tbl;
- post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
/* When offloading sample and encap action, if there is no valid
* neigh data struct, a slow path rule is offloaded first. Source
@@ -491,16 +468,16 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id)
+ struct mlx5_flow_attr *attr)
{
- struct mlx5e_post_act_handle *post_act_handle = NULL;
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_esw_flow_attr *pre_esw_attr;
struct mlx5_mapped_obj restore_obj = {};
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
struct mlx5e_sample_flow *sample_flow;
struct mlx5e_sample_attr *sample_attr;
struct mlx5_flow_attr *pre_attr;
+ u32 tunnel_id = attr->tunnel_id;
struct mlx5_eswitch *esw;
u32 default_tbl_id;
u32 obj_id;
@@ -512,7 +489,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
if (!sample_flow)
return ERR_PTR(-ENOMEM);
- sample_attr = attr->sample_attr;
+ sample_attr = &attr->sample_attr;
sample_attr->sample_flow = sample_flow;
/* For NICs with reg_c_preserve support or decap action, use
@@ -521,18 +498,11 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
* original flow table.
*/
esw = tc_psample->esw;
- if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) ||
- attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) {
struct mlx5_flow_table *ft;
ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
default_tbl_id = ft->id;
- post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr);
- if (IS_ERR(post_act_handle)) {
- err = PTR_ERR(post_act_handle);
- goto err_post_act;
- }
- sample_flow->post_act_handle = post_act_handle;
} else {
err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
if (err)
@@ -545,6 +515,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
err = PTR_ERR(sample_flow->sampler);
goto err_sampler;
}
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
/* Create an id mapping reg_c0 value to sample object. */
restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
@@ -558,7 +529,8 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
sample_attr->restore_obj_id = obj_id;
/* Create sample restore context. */
- sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle);
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts);
if (IS_ERR(sample_flow->restore)) {
err = PTR_ERR(sample_flow->restore);
goto err_sample_restore;
@@ -579,13 +551,13 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
if (tunnel_id)
pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
- pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+ pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
pre_attr->inner_match_level = attr->inner_match_level;
pre_attr->outer_match_level = attr->outer_match_level;
pre_attr->chain = attr->chain;
pre_attr->prio = attr->prio;
- pre_attr->sample_attr = attr->sample_attr;
- sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+ pre_attr->ft = attr->ft;
+ pre_attr->sample_attr = *sample_attr;
pre_esw_attr = pre_attr->esw_attr;
pre_esw_attr->in_mdev = esw_attr->in_mdev;
pre_esw_attr->in_rep = esw_attr->in_rep;
@@ -610,9 +582,6 @@ err_sampler:
if (sample_flow->post_rule)
del_post_rule(esw, sample_flow, attr);
err_post_rule:
- if (post_act_handle)
- mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle);
-err_post_act:
kfree(sample_flow);
return ERR_PTR(err);
}
@@ -632,15 +601,13 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
* will hit fw syndromes.
*/
esw = tc_psample->esw;
- sample_flow = attr->sample_attr->sample_flow;
+ sample_flow = attr->sample_attr.sample_flow;
mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
sample_restore_put(tc_psample, sample_flow->restore);
- mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
sampler_put(tc_psample, sample_flow->sampler);
- if (sample_flow->post_act_handle)
- mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
- else
+ if (sample_flow->post_rule)
del_post_rule(esw, sample_flow, attr);
kfree(sample_flow->pre_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
index 9ef8a49d7801..a569367eae4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id);
+ struct mlx5_flow_attr *attr);
void
mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
@@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
static inline struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id)
+ struct mlx5_flow_attr *attr)
{ return ERR_PTR(-EOPNOTSUPP); }
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 2445e2ae3324..864ce0c393e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,18 +14,21 @@
#include <linux/workqueue.h>
#include <linux/refcount.h>
#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
#include "lib/fs_chains.h"
#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+#include "en/tc_priv.h"
#include "en/mod_hdr.h"
#include "en/mapping.h"
#include "en/tc/post_act.h"
#include "en.h"
#include "en_tc.h"
#include "en_rep.h"
+#include "fs_core.h"
-#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen)
-#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
#define MLX5_CT_STATE_TRK_BIT BIT(2)
#define MLX5_CT_STATE_NAT_BIT BIT(3)
@@ -33,12 +36,27 @@
#define MLX5_CT_STATE_RELATED_BIT BIT(5)
#define MLX5_CT_STATE_INVALID_BIT BIT(6)
-#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
-#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
+#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
+#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
+
+/* Statically allocate modify actions for
+ * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
+ * This will be increased dynamically if needed (for the ipv6 snat + dnat).
+ */
+#define MLX5_CT_MIN_MOD_ACTS 10
#define ct_dbg(fmt, args...)\
netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+struct mlx5_tc_ct_debugfs {
+ struct {
+ atomic_t offloaded;
+ atomic_t rx_dropped;
+ } stats;
+
+ struct dentry *root;
+};
+
struct mlx5_tc_ct_priv {
struct mlx5_core_dev *dev;
const struct net_device *netdev;
@@ -55,19 +73,23 @@ struct mlx5_tc_ct_priv {
struct mapping_ctx *labels_mapping;
enum mlx5_flow_namespace_type ns_type;
struct mlx5_fs_chains *chains;
+ struct mlx5_ct_fs *fs;
+ struct mlx5_ct_fs_ops *fs_ops;
spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
+
+ struct mlx5_tc_ct_debugfs debugfs;
};
struct mlx5_ct_flow {
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
- struct mlx5e_post_act_handle *post_act_handle;
struct mlx5_ct_ft *ft;
u32 chain_mapping;
};
struct mlx5_ct_zone_rule {
- struct mlx5_flow_handle *rule;
+ struct mlx5_ct_fs_rule *rule;
struct mlx5e_mod_hdr_handle *mh;
struct mlx5_flow_attr *attr;
bool nat;
@@ -251,7 +273,8 @@ mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
return -EOPNOTSUPP;
}
} else {
- return -EOPNOTSUPP;
+ if (tuple->ip_proto != IPPROTO_GRE)
+ return -EOPNOTSUPP;
}
return 0;
@@ -320,7 +343,33 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
}
static int
-mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
struct flow_rule *rule)
{
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -335,8 +384,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
flow_rule_match_basic(rule, &match);
- mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
- headers_v);
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -432,6 +480,23 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
ntohs(match.key->flags));
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
return 0;
}
@@ -456,7 +521,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
- mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
+ ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
kfree(attr);
@@ -468,6 +533,8 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
{
mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ atomic_dec(&ct_priv->debugfs.stats.offloaded);
}
static struct flow_action_entry *
@@ -530,6 +597,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
return 0;
}
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
char *modact)
@@ -609,22 +682,15 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
struct flow_action *flow_action = &flow_rule->action;
struct mlx5_core_dev *mdev = ct_priv->dev;
struct flow_action_entry *act;
- size_t action_size;
char *modact;
int err, i;
- action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
-
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_MANGLE: {
- err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
- mod_acts);
- if (err)
- return err;
-
- modact = mod_acts->actions +
- mod_acts->num_actions * action_size;
+ modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
if (err)
@@ -650,9 +716,10 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_flow_attr *attr,
struct flow_rule *flow_rule,
struct mlx5e_mod_hdr_handle **mh,
- u8 zone_restore_id, bool nat)
+ u8 zone_restore_id, bool nat_table, bool has_nat)
{
- struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
+ DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
struct flow_action_entry *meta;
u16 ct_state = 0;
int err;
@@ -665,11 +732,12 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
&attr->ct_attr.ct_labels_id);
if (err)
return -EOPNOTSUPP;
- if (nat) {
- err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
- &mod_acts);
- if (err)
- goto err_mapping;
+ if (nat_table) {
+ if (has_nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
ct_state |= MLX5_CT_STATE_NAT_BIT;
}
@@ -684,7 +752,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_mapping;
- if (nat) {
+ if (nat_table && has_nat) {
attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
mod_acts.num_actions,
mod_acts.actions);
@@ -706,11 +774,11 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
}
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
return 0;
err_mapping:
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
return err;
}
@@ -752,7 +820,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
&zone_rule->mh,
- zone_restore_id, nat);
+ zone_restore_id,
+ nat,
+ mlx5_tc_ct_entry_has_nat(entry));
if (err) {
ct_dbg("Failed to create ct entry mod hdr");
goto err_mod_hdr;
@@ -764,16 +834,20 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->dest_chain = 0;
attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
- attr->outer_match_level = MLX5_MATCH_L4;
+ if (entry->tuple.ip_proto == IPPROTO_TCP ||
+ entry->tuple.ip_proto == IPPROTO_UDP)
+ attr->outer_match_level = MLX5_MATCH_L4;
+ else
+ attr->outer_match_level = MLX5_MATCH_L3;
attr->counter = entry->counter->counter;
- attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
attr->esw_attr->in_mdev = priv->mdev;
- mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
- zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
if (IS_ERR(zone_rule->rule)) {
err = PTR_ERR(zone_rule->rule);
ct_dbg("Failed to add ct entry rule, nat: %d", nat);
@@ -868,14 +942,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
- struct mlx5e_priv *priv;
-
if (!refcount_dec_and_test(&entry->refcnt))
return;
- priv = netdev_priv(entry->ct_priv->netdev);
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
- queue_work(priv->wq, &entry->work);
+ queue_work(entry->ct_priv->wq, &entry->work);
}
static struct mlx5_ct_counter *
@@ -907,12 +978,9 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_tuple rev_tuple = entry->tuple;
struct mlx5_ct_counter *shared_counter;
struct mlx5_ct_entry *rev_entry;
- __be16 tmp_port;
/* get the reversed tuple */
- tmp_port = rev_tuple.port.src;
- rev_tuple.port.src = rev_tuple.port.dst;
- rev_tuple.port.dst = tmp_port;
+ swap(rev_tuple.port.src, rev_tuple.port.dst);
if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
__be32 tmp_addr = rev_tuple.ip.src_v4;
@@ -987,6 +1055,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_nat;
+ atomic_inc(&ct_priv->debugfs.stats.offloaded);
return 0;
err_nat:
@@ -1114,7 +1183,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
}
rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
- mlx5_tc_ct_entry_remove_from_tuples(entry);
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(entry);
@@ -1184,16 +1252,20 @@ mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
struct flow_keys flow_keys;
skb_reset_network_header(skb);
- skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
tuple->zone = zone;
if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
- flow_keys.basic.ip_proto != IPPROTO_UDP)
+ flow_keys.basic.ip_proto != IPPROTO_UDP &&
+ flow_keys.basic.ip_proto != IPPROTO_GRE)
return false;
- tuple->port.src = flow_keys.ports.src;
- tuple->port.dst = flow_keys.ports.dst;
+ if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
+ flow_keys.basic.ip_proto == IPPROTO_UDP) {
+ tuple->port.src = flow_keys.ports.src;
+ tuple->port.dst = flow_keys.ports.dst;
+ }
tuple->n_proto = flow_keys.basic.n_proto;
tuple->ip_proto = flow_keys.basic.ip_proto;
@@ -1360,9 +1432,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
- int err;
-
if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct action isn't available");
@@ -1373,17 +1442,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
attr->ct_attr.ct_action = act->ct.action;
attr->ct_attr.nf_ft = act->ct.flow_table;
- if (!clear_action)
- goto out;
-
- err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
- return err;
- }
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
-out:
return 0;
}
@@ -1460,7 +1518,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
}
pre_ct->miss_rule = rule;
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
kvfree(spec);
return 0;
@@ -1469,7 +1527,7 @@ err_miss_rule:
err_flow_rule:
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
err_mapping:
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
kvfree(spec);
return err;
}
@@ -1702,6 +1760,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
if (!refcount_dec_and_test(&ft->refcount))
return;
+ flush_workqueue(ct_priv->wq);
nf_flow_table_offload_del_cb(ft->nf_ft,
mlx5_tc_ct_block_flow_offload, ft);
rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
@@ -1716,7 +1775,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
/* We translate the tc filter with CT action to the following HW model:
*
* +---------------------+
- * + ft prio (tc chain) +
+ * + ft prio (tc chain) +
* + original match +
* +---------------------+
* | set chain miss mapping
@@ -1726,7 +1785,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* v
* +---------------------+
* + pre_ct/pre_ct_nat + if matches +-------------------------+
- * + zone+nat match +---------------->+ post_act (see below) +
+ * + zone+nat match +---------------->+ post_act (see below) +
* +---------------------+ set zone +-------------------------+
* | set zone
* v
@@ -1741,21 +1800,19 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do nat (if needed)
* v
* +--------------+
- * + post_act + original filter actions
+ * + post_act + original filter actions
* + fte_id match +------------------------>
* +--------------+
*/
static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *orig_spec,
struct mlx5_flow_attr *attr)
{
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5e_post_act_handle *handle;
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_ct_flow *ct_flow;
@@ -1764,7 +1821,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
if (!ct_flow) {
- kfree(ct_flow);
return ERR_PTR(-ENOMEM);
}
@@ -1778,14 +1834,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
ct_flow->ft = ft;
- handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- ct_dbg("Failed to allocate post action handle");
- goto err_post_act_handle;
- }
- ct_flow->post_act_handle = handle;
-
/* Base flow attributes of both rules on original rule attribute */
ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
if (!ct_flow->pre_ct_attr) {
@@ -1795,6 +1843,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
pre_ct_attr = ct_flow->pre_ct_attr;
memcpy(pre_ct_attr, attr, attr_sz);
+ pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
/* Modify the original rule's action to fwd and modify, leave decap */
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1813,30 +1862,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
ct_flow->chain_mapping = chain_mapping;
- err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
CHAIN_TO_REG, chain_mapping);
if (err) {
ct_dbg("Failed to set chain register mapping");
goto err_mapping;
}
- err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
- if (err) {
- ct_dbg("Failed to set post action handle");
- goto err_mapping;
- }
-
/* If original flow is decap, we do it before going into ct table
* so add a rewrite for the tunnel match_id.
*/
if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
attr->chain == 0) {
- u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
-
- err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
ct_priv->ns_type,
TUNNEL_TO_REG,
- tun_id);
+ attr->tunnel_id);
if (err) {
ct_dbg("Failed to set tunnel register mapping");
goto err_mapping;
@@ -1844,8 +1885,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- pre_mod_acts.num_actions,
- pre_mod_acts.actions);
+ pre_mod_acts->num_actions,
+ pre_mod_acts->actions);
if (IS_ERR(mod_hdr)) {
err = PTR_ERR(mod_hdr);
ct_dbg("Failed to create pre ct mod hdr");
@@ -1865,20 +1906,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
attr->ct_attr.ct_flow = ct_flow;
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
return ct_flow->pre_ct_rule;
err_insert_orig:
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
err_get_chain:
kfree(ct_flow->pre_ct_attr);
err_alloc_pre:
- mlx5e_tc_post_act_del(ct_priv->post_act, handle);
-err_post_act_handle:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
err_ft:
kfree(ct_flow);
@@ -1886,87 +1925,19 @@ err_ft:
return ERR_PTR(err);
}
-static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_flow_spec *orig_spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_flow_handle *rule;
- struct mlx5_ct_flow *ct_flow;
- int err;
-
- ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!ct_flow)
- return ERR_PTR(-ENOMEM);
-
- /* Base esw attributes on original rule attribute */
- pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!pre_ct_attr) {
- err = -ENOMEM;
- goto err_attr;
- }
-
- memcpy(pre_ct_attr, attr, attr_sz);
-
- mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- mod_acts->num_actions,
- mod_acts->actions);
- if (IS_ERR(mod_hdr)) {
- err = PTR_ERR(mod_hdr);
- ct_dbg("Failed to add create ct clear mod hdr");
- goto err_mod_hdr;
- }
-
- pre_ct_attr->modify_hdr = mod_hdr;
-
- rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- ct_dbg("Failed to add ct clear rule");
- goto err_insert;
- }
-
- attr->ct_attr.ct_flow = ct_flow;
- ct_flow->pre_ct_attr = pre_ct_attr;
- ct_flow->pre_ct_rule = rule;
- return rule;
-
-err_insert:
- mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
-err_mod_hdr:
- netdev_warn(priv->netdev,
- "Failed to offload ct clear flow, err %d\n", err);
- kfree(pre_ct_attr);
-err_attr:
- kfree(ct_flow);
-
- return ERR_PTR(err);
-}
-
struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
- bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
struct mlx5_flow_handle *rule;
if (!priv)
return ERR_PTR(-EOPNOTSUPP);
mutex_lock(&priv->control_lock);
-
- if (clear_action)
- rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
- else
- rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
mutex_unlock(&priv->control_lock);
return rule;
@@ -1974,21 +1945,17 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5e_tc_flow *flow,
- struct mlx5_ct_flow *ct_flow)
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
{
struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
- pre_ct_attr);
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
- if (ct_flow->post_act_handle) {
- mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
- mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
- mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
- }
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
kfree(ct_flow->pre_ct_attr);
kfree(ct_flow);
@@ -1996,7 +1963,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr)
{
struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
@@ -2008,11 +1974,43 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
return;
mutex_lock(&priv->control_lock);
- __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
mutex_unlock(&priv->control_lock);
}
static int
+mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
+ int err;
+
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
+ ct_dbg("Using SMFS ct flow steering provider");
+ fs_ops = mlx5_ct_fs_smfs_ops_get();
+ }
+
+ ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
+ if (!ct_priv->fs)
+ return -ENOMEM;
+
+ ct_priv->fs->netdev = ct_priv->netdev;
+ ct_priv->fs->dev = ct_priv->dev;
+ ct_priv->fs_ops = fs_ops;
+
+ err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
+ if (err)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ kfree(ct_priv->fs);
+ return err;
+}
+
+static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
const char **err_msg)
{
@@ -2064,7 +2062,7 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
/* Ignore_flow_level support isn't supported by default for VFs and so post_act
* won't be supported. Skip showing error msg.
*/
- if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
err_msg = "post action is missing";
err = -EOPNOTSUPP;
goto out_err;
@@ -2079,6 +2077,29 @@ out_err:
return err;
}
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
+ struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+ char dirname[16] = {};
+
+ if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
+ return;
+
+ ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
+ debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.offloaded);
+ debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
#define INIT_ERR_PREFIX "tc ct offload init failed"
struct mlx5_tc_ct_priv *
@@ -2150,8 +2171,23 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
goto err_ct_tuples_nat_ht;
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ err = mlx5_tc_ct_fs_init(ct_priv);
+ if (err)
+ goto err_init_fs;
+
+ mlx5_ct_tc_create_dbgfs(ct_priv);
return ct_priv;
+err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
err_ct_tuples_nat_ht:
rhashtable_destroy(&ct_priv->ct_tuples_ht);
err_ct_tuples_ht:
@@ -2180,8 +2216,13 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
if (!ct_priv)
return;
+ destroy_workqueue(ct_priv->wq);
+ mlx5_ct_tc_remove_dbgfs(ct_priv);
chains = ct_priv->chains;
+ ct_priv->fs_ops->destroy(ct_priv->fs);
+ kfree(ct_priv->fs);
+
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
mapping_destroy(ct_priv->zone_mapping);
@@ -2206,22 +2247,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
return true;
if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
- return false;
+ goto out_inc_drop;
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
- return false;
+ goto out_inc_drop;
spin_lock(&ct_priv->ht_lock);
entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
if (!entry) {
spin_unlock(&ct_priv->ht_lock);
- return false;
+ goto out_inc_drop;
}
if (IS_ERR(entry)) {
spin_unlock(&ct_priv->ht_lock);
- return false;
+ goto out_inc_drop;
}
spin_unlock(&ct_priv->ht_lock);
@@ -2229,4 +2270,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
__mlx5_tc_ct_entry_put(entry);
return true;
+
+out_inc_drop:
+ atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+ return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 99662af1e41a..5bbd6b92840f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -62,10 +62,11 @@ struct mlx5_ct_attr {
misc_parameters_2.metadata_reg_c_4),\
}
+/* 8 LSB of metadata C5 are reserved for packet color */
#define fteid_to_reg_ct {\
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\
- .moffset = 0,\
- .mlen = 32,\
+ .moffset = 8,\
+ .mlen = 24,\
.soffset = MLX5_BYTE_OFF(fte_match_param,\
misc_parameters_2.metadata_reg_c_5),\
}
@@ -84,8 +85,8 @@ struct mlx5_ct_attr {
.mlen = ESW_ZONE_ID_BITS,\
}
-#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
-#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
+#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG)
+#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG)
#if IS_ENABLED(CONFIG_MLX5_TC_CT)
@@ -116,19 +117,21 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr);
bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id);
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
#else /* CONFIG_MLX5_TC_CT */
static inline struct mlx5_tc_ct_priv *
@@ -171,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
}
static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_acts,
@@ -183,7 +193,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
static inline struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
@@ -193,7 +202,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
static inline void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr)
{
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index b689701ac7d8..2e42d7c5451e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -5,11 +5,13 @@
#define __MLX5_EN_TC_PRIV_H__
#include "en_tc.h"
+#include "en/tc/act/act.h"
#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
#define MLX5E_TC_MAX_SPLITS 1
+
enum {
MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
@@ -32,13 +34,17 @@ enum {
struct mlx5e_tc_flow_parse_attr {
const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS];
struct net_device *filter_dev;
struct mlx5_flow_spec spec;
+ struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
- struct ethhdr eth;
+ struct mlx5e_tc_act_parse_state parse_state;
};
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+
/* Helper struct for accessing a struct containing list_head array.
* Containing struct
* |- Helper array
@@ -90,6 +96,7 @@ struct mlx5e_tc_flow {
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5e_tc_flow *peer_flow;
struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */
struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
@@ -103,10 +110,21 @@ struct mlx5e_tc_flow {
struct rcu_head rcu_head;
struct completion init_done;
struct completion del_hw_done;
- int tunnel_id; /* the mapped tunnel id of this flow */
struct mlx5_flow_attr *attr;
+ struct list_head attrs;
+ u32 chain_mapping;
};
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
struct mlx5_flow_handle *
@@ -115,7 +133,17 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr);
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow);
+
+void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow);
+int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow);
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow);
bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow);
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
{
@@ -165,6 +193,7 @@ struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec);
+
void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr);
@@ -176,4 +205,14 @@ struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
struct mlx5e_tc_int_port_priv *
mlx5e_get_int_port_priv(struct mlx5e_priv *priv);
+
+struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev);
+
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec);
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec);
+
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index a5e450973225..e6f64d890fb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
+#include <net/inet_ecn.h>
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
@@ -103,7 +104,7 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
}
static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
+ struct net_device *dev,
struct mlx5e_tc_tun_route_attr *attr)
{
struct net_device *route_dev;
@@ -122,13 +123,13 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
} else {
- struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
if (tunnel && tunnel->get_remote_ifindex)
- attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(mirred_dev);
+ attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
}
- rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4);
+ rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -440,10 +441,10 @@ release_neigh:
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
+ struct net_device *dev,
struct mlx5e_tc_tun_route_attr *attr)
{
- struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
struct net_device *route_dev;
struct net_device *out_dev;
struct dst_entry *dst;
@@ -451,8 +452,8 @@ static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
int ret;
if (tunnel && tunnel->get_remote_ifindex)
- attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(mirred_dev);
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6,
+ attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
@@ -505,7 +506,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
int err;
attr.ttl = tun_key->ttl;
- attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
attr.fl.fl6.saddr = tun_key->u.ipv6.src;
@@ -619,7 +620,7 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
attr.ttl = tun_key->ttl;
- attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
attr.fl.fl6.saddr = tun_key->u.ipv6.src;
@@ -708,9 +709,11 @@ release_neigh:
int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *flow_attr)
+ struct mlx5_flow_attr *flow_attr,
+ struct net_device *filter_dev)
{
struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_int_port *int_port;
TC_TUN_ROUTE_ATTR_INIT(attr);
u16 vport_num;
@@ -720,14 +723,14 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
/* Addresses are swapped for decap */
attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
- err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr);
+ err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (flow_attr->tun_ip_version == 6) {
/* Addresses are swapped for decap */
attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
- err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr);
+ err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
}
#endif
else
@@ -745,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
misc_parameters.vxlan_vni);
esw_attr->rx_tun_attr->decap_vport = vport_num;
- } else if (netif_is_ovs_master(attr.route_dev)) {
+ } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
attr.route_dev->ifindex,
MLX5E_TC_INT_PORT_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index aa092eaeaec3..b38f693bbb52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -94,7 +94,8 @@ mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
#endif
int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr);
+ struct mlx5_flow_attr *attr,
+ struct net_device *filter_dev);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 042b1abe1437..5aff97914367 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -173,19 +173,29 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
/* Do not offload flows with unresolved neighbors */
if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
continue;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ continue;
+ }
+
/* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
@@ -214,12 +224,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
+ spec = &flow->attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
/* mark the flow's encap dest as non-valid */
esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
@@ -230,7 +241,8 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
continue;
}
- mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
flow->rule[0] = rule;
/* was unset when fast path rule removed */
flow_flag_set(flow, OFFLOADED);
@@ -488,12 +500,17 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
int out_index);
void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index)
{
struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- if (flow->attr->esw_attr->dests[out_index].flags &
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ if (attr->esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
mlx5e_detach_encap_route(priv, flow, out_index);
@@ -733,6 +750,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
unsigned long tbl_time_before,
@@ -740,6 +758,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
@@ -748,8 +767,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
+ const struct mlx5e_mpls_info *mpls_info;
unsigned long tbl_time_before = 0;
struct mlx5e_encap_entry *e;
struct mlx5e_encap_key key;
@@ -760,6 +779,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
tun_info = parse_attr->tun_info[out_index];
+ mpls_info = &parse_attr->mpls_info[out_index];
family = ip_tunnel_info_af(tun_info);
key.ip_tun_key = &tun_info->key;
key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
@@ -810,6 +830,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
goto out_err_init;
}
e->tun_info = tun_info;
+ memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
if (err)
goto out_err_init;
@@ -834,8 +855,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
e->compl_result = 1;
attach_flow:
- err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
- out_index);
+ err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+ tbl_time_before, out_index);
if (err)
goto out_err;
@@ -885,20 +906,18 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5_pkt_reformat_params reformat_params;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_decap_entry *d;
struct mlx5e_decap_key key;
uintptr_t hash_key;
int err = 0;
- parse_attr = flow->attr->parse_attr;
- if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
NL_SET_ERR_MSG_MOD(extack,
"encap header larger than max supported");
return -EOPNOTSUPP;
}
- key.key = parse_attr->eth;
+ key.key = attr->eth;
hash_key = hash_decap_info(&key);
mutex_lock(&esw->offloads.decap_tbl_lock);
d = mlx5e_decap_get(priv, &key, hash_key);
@@ -928,8 +947,8 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
memset(&reformat_params, 0, sizeof(reformat_params));
reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
- reformat_params.size = sizeof(parse_attr->eth);
- reformat_params.data = &parse_attr->eth;
+ reformat_params.size = sizeof(attr->eth);
+ reformat_params.data = &attr->eth;
d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
&reformat_params,
MLX5_FLOW_NAMESPACE_FDB);
@@ -1159,7 +1178,7 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
tbl_time_after = tbl_time_before;
- err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
if (err || !esw_attr->rx_tun_attr->decap_vport)
goto out;
@@ -1198,6 +1217,7 @@ out:
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
unsigned long tbl_time_before,
@@ -1206,7 +1226,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned long tbl_time_after = tbl_time_before;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5e_route_entry *r;
@@ -1357,17 +1376,19 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
list_for_each_entry(flow, encap_flows, tmp_list) {
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
struct mlx5_flow_spec *spec;
if (flow_flag_test(flow, FAILED))
continue;
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
parse_attr = attr->parse_attr;
- spec = &parse_attr->spec;
err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
e->out_dev, e->route_dev_ifindex,
@@ -1377,7 +1398,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
continue;
}
- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
err);
@@ -1389,9 +1410,18 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
goto offload_to_slow_path;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ goto offload_to_slow_path;
+ }
+
/* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
@@ -1480,7 +1510,7 @@ static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
spec = &parse_attr->spec;
- err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
err);
@@ -1579,6 +1609,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
struct net_device *fib_dev;
fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ if (fen_info->fi->nh)
+ return NULL;
fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
fen_info->dst_len != 32)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
index 3391504d9a08..d542b8476491 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -7,15 +7,19 @@
#include "tc_priv.h"
void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index);
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index);
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid);
+
int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index 60952b33b568..c5b1617d556f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -30,16 +30,15 @@ static int generate_ip_tun_hdr(char buf[],
struct mlx5e_encap_entry *r)
{
const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ const struct mlx5e_mpls_info *mpls_info = &r->mpls_info;
struct udphdr *udp = (struct udphdr *)(buf);
struct mpls_shim_hdr *mpls;
- u32 tun_id;
- tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id));
mpls = (struct mpls_shim_hdr *)(udp + 1);
*ip_proto = IPPROTO_UDP;
udp->dest = tun_key->tp_dst;
- *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true);
+ *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos);
return 0;
}
@@ -60,37 +59,31 @@ static int parse_tunnel(struct mlx5e_priv *priv,
void *headers_v)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
- struct flow_match_enc_keyid enc_keyid;
struct flow_match_mpls match;
void *misc2_c;
void *misc2_v;
- misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
- misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
- return 0;
-
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
- return 0;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
-
- if (!enc_keyid.mask->keyid)
- return 0;
-
if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
return -EOPNOTSUPP;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return -EOPNOTSUPP;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
flow_rule_match_mpls(rule, &match);
/* Only support matching the first LSE */
if (match.mask->used_lses != 1)
return -EOPNOTSUPP;
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
MLX5_SET(fte_match_set_misc2, misc2_c,
outer_first_mpls_over_udp.mpls_label,
match.mask->ls[0].mpls_label);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index da169b816665..d4239e3b3c88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -88,9 +88,6 @@ void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
(MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
break;
- case MLX5E_PACKET_MERGE_SHAMPO:
- MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO);
- break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index a55b066746cb..201ac7dd338f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
bool busy = false;
int work_done = 0;
+ rcu_read_lock();
+
ch_stats->poll++;
work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
busy |= work_done == budget;
busy |= rq->post_wqes(rq);
- if (busy)
- return budget;
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
if (unlikely(!napi_complete_done(napi, work_done)))
- return work_done;
+ goto out;
mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
return work_done;
}
@@ -140,7 +147,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
t->stats = &priv->trap_stats.ch;
- netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
+ netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll);
err = mlx5e_open_trap_rq(priv, t);
if (unlikely(err))
@@ -172,6 +179,7 @@ static void mlx5e_activate_trap(struct mlx5e_trap *trap)
{
napi_enable(&trap->napi);
mlx5e_activate_rq(&trap->rq);
+ mlx5e_trigger_napi_sched(&trap->napi);
}
void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
@@ -222,12 +230,12 @@ static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
switch (trap_id) {
case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
- err = mlx5e_add_vlan_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
if (err)
goto err_out;
break;
case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
- err = mlx5e_add_mac_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
if (err)
goto err_out;
break;
@@ -248,10 +256,10 @@ static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
{
switch (trap_id) {
case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
- mlx5e_remove_vlan_trap(priv);
+ mlx5e_remove_vlan_trap(priv->fs);
break;
case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
- mlx5e_remove_mac_trap(priv);
+ mlx5e_remove_mac_trap(priv->fs);
break;
default:
netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 4cdf8e5b24c2..853f312cd757 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -9,20 +9,28 @@
#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+/* IPSEC inline data includes:
+ * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for
+ * next header.
+ * 2. ESP authentication data: 16 bytes for ICV.
*/
-#if L1_CACHE_BYTES < 128
-#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
+#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \
+ 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS)
-#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+/* 366 should be big enough to cover all L2, L3 and L4 headers with possible
+ * encapsulations.
+ */
+#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \
+ MLX5_SEND_WQE_DS)
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */
+#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \
+ MLX5E_MAX_TX_INLINE_DS + \
+ MLX5E_MAX_TX_IPSEC_DS + \
+ MAX_SKB_FRAGS + 1, \
+ MLX5_SEND_WQEBB_NUM_DS)
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
@@ -57,10 +65,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
/* RX */
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle);
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
@@ -68,13 +74,17 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
/* TX */
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
static inline bool
+mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
+{
+ return (*fifo->pc - *fifo->cc) < fifo->mask;
+}
+
+static inline bool
mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
{
return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
@@ -167,6 +177,11 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
+static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+}
+
struct mlx5e_shampo_umr {
u16 len;
};
@@ -303,9 +318,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
-static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
- return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
+ return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
}
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
@@ -426,9 +441,11 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
}
}
-static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
+#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
+
+static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
{
- BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
+ WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev));
/* A WQE must not cross the page boundary, hence two conditions:
* 1. Its size must not exceed the page size.
@@ -438,19 +455,36 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
* stop room of X-1 + X.
* WQE size is also limited by the hardware limit.
*/
+ WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
- if (__builtin_constant_p(wqe_size))
- BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
- else
- WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+ return MLX5E_STOP_ROOM(wqe_size);
+}
- return wqe_size * 2 - 1;
+static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
+{
+ return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
+}
+
+static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev)
+{
+ u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+
+ return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs);
}
static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
{
- u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size);
+ u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size);
return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
}
+
+static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
+{
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+
+ return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 2f0df5cc1a2d..20507ef2f956 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -57,12 +57,14 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
- struct mlx5e_dma_info *di, struct xdp_buff *xdp)
+ struct page *page, struct xdp_buff *xdp)
{
+ struct skb_shared_info *sinfo = NULL;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
+ int i;
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
@@ -96,46 +98,77 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdptxd.dma_addr = dma_addr;
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = dma_addr;
- } else {
- /* Driver assumes that xdp_convert_buff_to_frame returns
- * an xdp_frame that points to the same memory region as
- * the original xdp_buff. It allows to map the memory only
- * once and to use the DMA_BIDIRECTIONAL mode.
- */
- xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ return false;
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ return true;
+ }
+
+ /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
+ * that points to the same memory region as the original xdp_buff. It
+ * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
+ * mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ xdpi.page.rq = rq;
+
+ dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
- dma_addr = di->addr + (xdpf->data - (void *)xdpf);
- dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
- DMA_TO_DEVICE);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+ u32 len;
- xdptxd.dma_addr = dma_addr;
- xdpi.page.rq = rq;
- xdpi.page.di = *di;
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ len = skb_frag_size(frag);
+ dma_sync_single_for_device(sq->pdev, addr, len,
+ DMA_BIDIRECTIONAL);
+ }
}
- return INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0);
+ xdptxd.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ return false;
+
+ xdpi.page.page = page;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ xdpi.page.page = skb_frag_page(frag);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
+ }
+
+ return true;
}
/* returns true if packet was consumed by xdp */
-bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- u32 *len, struct xdp_buff *xdp)
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp)
{
- struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
u32 act;
int err;
- if (!prog)
- return false;
-
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
- *len = xdp->data_end - xdp->data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
@@ -147,11 +180,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
- mlx5e_page_dma_unmap(rq, di);
+ mlx5e_page_dma_unmap(rq, page);
rq->stats->xdp_redirect++;
return true;
default:
- bpf_warn_invalid_xdp_action(act);
+ bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
fallthrough;
case XDP_ABORTED:
xdp_abort:
@@ -199,7 +232,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
@@ -245,10 +278,8 @@ enum {
INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
- const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
-
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- stop_room))) {
+ sq->stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -262,12 +293,26 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
}
INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result);
+
+INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi, int check_result)
+ struct skb_shared_info *sinfo, int check_result)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ if (unlikely(sinfo)) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ }
+
if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++;
return false;
@@ -288,17 +333,16 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
+ if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
-INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
{
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -308,43 +352,76 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
return MLX5E_XDP_CHECK_OK;
}
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
+}
+
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi, int check_result)
+ struct skb_shared_info *sinfo, int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg = wqe->data;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5e_tx_wqe *wqe;
dma_addr_t dma_addr = xdptxd->dma_addr;
u32 dma_len = xdptxd->len;
+ u16 ds_cnt, inline_hdr_sz;
+ u8 num_wqebbs = 1;
+ int num_frags = 0;
+ u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- net_prefetchw(wqe);
-
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
- if (!check_result)
- check_result = mlx5e_xmit_xdp_frame_check(sq);
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
+ ds_cnt++;
+
+ /* check_result must be 0 if sinfo is passed. */
+ if (!check_result) {
+ int stop_room = 1;
+
+ if (unlikely(sinfo)) {
+ ds_cnt += sinfo->nr_frags;
+ num_frags = sinfo->nr_frags;
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
+ * enough to hold all fragments.
+ */
+ stop_room = MLX5E_STOP_ROOM(num_wqebbs);
+ }
+
+ check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
+ }
if (unlikely(check_result < 0))
return false;
- cseg->fm_ce_se = 0;
+ pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ net_prefetchw(wqe);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ inline_hdr_sz = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
- eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
dseg++;
}
@@ -354,11 +431,45 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- sq->pc++;
+ if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
+ u8 num_pkts = 1 + num_frags;
+ int i;
+
+ memset(&cseg->trailer, 0, sizeof(cseg->trailer));
+ memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
+
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ dseg->lkey = sq->mkey_be;
+
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+
+ dseg++;
+ dseg->addr = cpu_to_be64(addr);
+ dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
+ dseg->lkey = sq->mkey_be;
+ }
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_pkts = num_pkts,
+ };
+
+ sq->pc += num_wqebbs;
+ } else {
+ cseg->fm_ce_se = 0;
+
+ sq->pc++;
+ }
sq->doorbell_cseg = cseg;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
@@ -384,7 +495,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
break;
case MLX5E_XDP_XMIT_MODE_PAGE:
/* XDP_TX from the regular RQ */
- mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
break;
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
@@ -537,12 +648,13 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.frame.dma_addr = xdptxd.dma_addr;
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0);
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
if (unlikely(!ret)) {
dma_unmap_single(sq->pdev, xdptxd.dma_addr,
xdptxd.len, DMA_TO_DEVICE);
break;
}
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
nxmit++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8d991c3b7a50..bc2d9034af5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -38,7 +38,6 @@
#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */)
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
@@ -47,8 +46,8 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
-bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- u32 *len, struct xdp_buff *xdp);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
@@ -59,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi,
+ struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi,
+ struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
@@ -123,12 +122,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
return cur;
}
-static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
+static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
if (session->inline_on)
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
- MLX5E_TX_MPW_MAX_NUM_DS;
- return mlx5e_tx_mpwqe_is_full(session);
+ max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+
+ return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
}
struct mlx5e_xdp_wqe_info {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 7b562d2c8a19..ebada0c5af3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
{
struct device *dev = mlx5_core_dma_dev(priv->mdev);
- return xsk_pool_dma_map(pool, dev, 0);
+ return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
}
static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
struct xsk_buff_pool *pool)
{
- return xsk_pool_dma_unmap(pool, 0);
+ return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
}
static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
@@ -72,6 +72,7 @@ void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *x
{
xsk->headroom = xsk_pool_get_headroom(pool);
xsk->chunk_size = xsk_pool_get_chunk_size(pool);
+ xsk->unaligned = pool->unaligned;
}
static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
@@ -98,6 +99,15 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
mlx5e_build_xsk_param(pool, &xsk);
+ if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
+ const char *recommendation = is_power_of_2(xsk.chunk_size) ?
+ "Upgrade firmware" : "Disable striding RQ";
+
+ mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n",
+ xsk.chunk_size, recommendation);
+ }
+
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
/* XSK objects will be created on open. */
goto validate_closed;
@@ -117,20 +127,18 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
goto err_remove_pool;
mlx5e_activate_xsk(c);
+ mlx5e_trigger_napi_icosq(c);
/* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
* any Fill Ring entries at the setup stage.
*/
- err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
- if (unlikely(err))
- goto err_deactivate;
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true);
- return 0;
+ mlx5e_deactivate_rq(&c->rq);
+ mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY);
-err_deactivate:
- mlx5e_deactivate_xsk(c);
- mlx5e_close_xsk(c);
+ return 0;
err_remove_pool:
mlx5e_xsk_remove_pool(&priv->xsk, ix);
@@ -169,7 +177,13 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
goto remove_pool;
c = priv->channels.c[ix];
- mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
+
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_icosq(c);
+ mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT);
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);
+
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
@@ -207,11 +221,10 @@ int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
- u16 ix;
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ if (unlikely(qid >= params->num_channels))
return -EINVAL;
- return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) :
- mlx5e_xsk_disable_pool(priv, ix);
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) :
+ mlx5e_xsk_disable_pool(priv, qid);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 8e7b877d8a12..c91b54d9ff27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -4,21 +4,225 @@
#include "rx.h"
#include "en/xdp.h"
#include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
/* RX data path */
-static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
- u32 cqe_bcnt)
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &icosq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int batch, i;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
+ goto err;
+
+ BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ rq->mpwqe.pages_per_wqe);
+
+ /* If batch < pages_per_wqe, either:
+ * 1. Some (or all) descriptors were invalid.
+ * 2. dma_need_sync is true, and it fell back to allocating one frame.
+ * In either case, try to continue allocating frames one by one, until
+ * the first error, which will mean there are no more valid descriptors.
+ */
+ for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
+ wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!wi->alloc_units[batch].xsk))
+ goto err_reuse_batch;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+ } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ }
+ } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
+ u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size * 2),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ };
+ }
+ } else {
+ __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
+ rq->xsk_pool->chunk_size);
+ __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ .bcount = frame_size,
+ };
+ umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ .bcount = pad_size,
+ };
+ }
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
+
+ /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
+ offset = ix * rq->mpwqe.mtts_per_wqe;
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
+ offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
+ offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ icosq->pc += rq->mpwqe.umr_wqebbs;
+
+ icosq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_reuse_batch:
+ while (--batch >= 0)
+ xsk_buff_free(wi->alloc_units[batch].xsk);
+
+err:
+ rq->stats->buff_alloc_err++;
+ return -ENOMEM;
+}
+
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct xdp_buff **buffs;
+ u32 contig, alloc;
+ int i;
+
+ /* mlx5e_init_frags_partition creates a 1:1 mapping between
+ * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
+ * allocate XDP buffers straight into alloc_units.
+ */
+ BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
+ sizeof(rq->wqe.alloc_units[0].xsk));
+ buffs = (struct xdp_buff **)rq->wqe.alloc_units;
+ contig = mlx5_wq_cyc_get_size(wq) - ix;
+ if (wqe_bulk <= contig) {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
+ } else {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
+ if (likely(alloc == contig))
+ alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
+ }
+
+ for (i = 0; i < alloc; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return alloc;
+}
+
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!frag->au->xsk))
+ return i;
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return wqe_bulk;
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
+{
+ u32 totallen = xdp->data_end - xdp->data_meta;
+ u32 metalen = xdp->data - xdp->data_meta;
struct sk_buff *skb;
- skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+ skb = napi_alloc_skb(rq->cq.napi, totallen);
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
}
- skb_put_data(skb, data, cqe_bcnt);
+ skb_put_data(skb, xdp->data_meta, totallen);
+
+ if (metalen) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
return skb;
}
@@ -29,8 +233,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
u32 head_offset,
u32 page_idx)
{
- struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
- u32 cqe_bcnt32 = cqe_bcnt;
+ struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
+ struct bpf_prog *prog;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -45,8 +249,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(head_offset);
- xdp->data_end = xdp->data + cqe_bcnt32;
- xdp_set_data_meta_invalid(xdp);
+ xsk_buff_set_size(xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
@@ -65,7 +268,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
* allocated first from the Reuse Ring, so it has enough space.
*/
- if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@@ -74,15 +278,15 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
* frame. On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32);
+ return mlx5e_xsk_construct_skb(rq, xdp);
}
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt)
{
- struct xdp_buff *xdp = wi->di->xsk;
+ struct xdp_buff *xdp = wi->au->xsk;
+ struct bpf_prog *prog;
/* wi->offset is not used in this function, because xdp->data and the
* DMA address point directly to the necessary place. Furthermore, the
@@ -91,22 +295,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(wi->offset);
- xdp->data_end = xdp->data + cqe_bcnt;
- xdp_set_data_meta_invalid(xdp);
+ xsk_buff_set_size(xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
- rq->stats->wqe_err++;
- return NULL;
- }
-
- if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
- * will be handled by mlx5e_put_rx_frag.
+ * will be handled by mlx5e_free_rx_wqe.
* On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt);
+ return mlx5e_xsk_construct_skb(rq, xdp);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index 7f88ccf67fdd..087c943bd8e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,48 +5,19 @@
#define __MLX5_EN_XSK_RX_H__
#include "en.h"
-#include <net/xdp_sock_drv.h>
/* RX data path */
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
struct mlx5e_mpw_info *wi,
u16 cqe_bcnt,
u32 head_offset,
u32 page_idx);
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
-static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
- if (!dma_info->xsk)
- return -ENOMEM;
-
- /* Store the DMA address without headroom. In striding RQ case, we just
- * provide pages for UMR, and headroom is counted at the setup stage
- * when creating a WQE. In non-striding RQ case, headroom is accounted
- * in mlx5e_alloc_rx_wqe.
- */
- dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
-
- return 0;
-}
-
-static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
-{
- if (!xsk_uses_need_wakeup(rq->xsk_pool))
- return alloc_err;
-
- if (unlikely(alloc_err))
- xsk_set_rx_need_wakeup(rq->xsk_pool);
- else
- xsk_clear_rx_need_wakeup(rq->xsk_pool);
-
- return false;
-}
-
#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 538bc2419bd8..ff03c43833bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -4,24 +4,20 @@
#include "setup.h"
#include "en/params.h"
#include "en/txrx.h"
+#include "en/health.h"
+#include <net/xdp_sock_drv.h>
-/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
- * change unexpectedly, and mlx5e has a minimum valid stride size for striding
- * RQ, keep this check in the driver.
+/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal
+ * stride size of striding RQ.
*/
-#define MLX5E_MIN_XSK_CHUNK_SIZE 2048
+#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE)
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev)
{
/* AF_XDP doesn't support frames larger than PAGE_SIZE. */
- if (xsk->chunk_size > PAGE_SIZE ||
- xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
- return false;
-
- /* Current MTU and XSK headroom don't allow packets to fit the frames. */
- if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size)
+ if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
return false;
/* frag_sz is different for regular and XSK RQs, so ensure that linear
@@ -29,9 +25,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
*/
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk);
default: /* MLX5_WQ_TYPE_CYCLIC */
- return mlx5e_rx_is_linear_skb(params, xsk);
+ return mlx5e_rx_is_linear_skb(mdev, params, xsk);
}
}
@@ -42,7 +38,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
struct mlx5e_channel_param *cparam)
{
mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
- mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
+ mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq);
}
static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
@@ -63,13 +59,14 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->clock = &mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
+ rq->channel = c;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
rq->xsk_pool = pool;
- rq->stats = &c->priv->channel_stats[c->ix].xskrq;
+ rq->stats = &c->priv->channel_stats[c->ix]->xskrq;
rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
- rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK;
+ rq_xdp_ix = c->ix;
err = mlx5e_rq_set_handlers(rq, params, xsk);
if (err)
return err;
@@ -157,7 +154,7 @@ err_free_cparam:
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
+ synchronize_net(); /* Sync with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
@@ -170,16 +167,25 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
void mlx5e_activate_xsk(struct mlx5e_channel *c)
{
+ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
+ * activating XSKRQ in the middle of recovery.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- /* TX queue is created active. */
+ mlx5e_reporter_icosq_resume_recovery(c);
- spin_lock_bh(&c->async_icosq_lock);
- mlx5e_trigger_irq(&c->async_icosq);
- spin_unlock_bh(&c->async_icosq_lock);
+ /* TX queue is created active. */
}
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
{
- mlx5e_deactivate_rq(&c->xskrq);
+ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
+ * middle of recovery. Suspend the recovery to avoid it.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+
/* TX queue is disabled on close. */
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 8e96260fce1d..367a9505ca4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -12,18 +12,14 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
struct mlx5e_channel *c;
- u16 ix;
if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN;
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ if (unlikely(qid >= params->num_channels))
return -EINVAL;
- c = priv->channels.c[ix];
-
- if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
- return -ENXIO;
+ c = priv->channels.c[qid];
if (!napi_if_scheduled_mark_missed(&c->napi)) {
/* To avoid WQE overrun, don't post a NOP if async_icosq is not
@@ -36,9 +32,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
return 0;
- spin_lock_bh(&c->async_icosq_lock);
- mlx5e_trigger_irq(&c->async_icosq);
- spin_unlock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_napi_icosq(c);
}
return 0;
@@ -103,12 +97,15 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result);
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ check_result);
if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xsk_tx_post_err(sq, &xdpi);
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
}
flush = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index a05085035f23..9c505158b975 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,6 @@
#define __MLX5_EN_XSK_TX_H__
#include "en.h"
-#include <net/xdp_sock_drv.h>
/* TX data path */
@@ -13,15 +12,4 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
-static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
-{
- if (!xsk_uses_need_wakeup(sq->xsk_pool))
- return;
-
- if (sq->pc != sq->cc)
- xsk_clear_tx_need_wakeup(sq->xsk_pool);
- else
- xsk_set_tx_need_wakeup(sq->xsk_pool);
-}
-
#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index d964665eaa63..07187028f0d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -37,8 +37,9 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/tls.h"
-#include "en_accel/tls_rxtx.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include <en_accel/macsec.h>
#include "en.h"
#include "en/txrx.h"
@@ -124,8 +125,9 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
#ifdef CONFIG_MLX5_EN_TLS
/* May send SKBs and WQEs. */
- if (mlx5e_tls_skb_offloaded(skb))
- if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
+ if (mlx5e_ktls_skb_offloaded(skb))
+ if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb,
+ &state->tls)))
return false;
#endif
@@ -136,16 +138,16 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
}
#endif
- return true;
-}
+#ifdef CONFIG_MLX5_EN_MACSEC
+ if (unlikely(mlx5e_macsec_skb_is_offload(skb))) {
+ struct mlx5e_priv *priv = netdev_priv(dev);
-static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state)
-{
-#ifdef CONFIG_MLX5_EN_IPSEC
- return mlx5e_ipsec_is_tx_flow(&state->ipsec);
-#else
- return false;
+ if (unlikely(!mlx5e_macsec_handle_tx_skb(priv->macsec, skb)))
+ return false;
+ }
#endif
+
+ return true;
}
static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
@@ -171,6 +173,11 @@ static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
#endif
+#ifdef CONFIG_MLX5_EN_MACSEC
+ if (unlikely(mlx5e_macsec_skb_is_offload(skb)))
+ mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg);
+#endif
+
#if IS_ENABLED(CONFIG_GENEVE)
if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
mlx5e_tx_tunnel_accel(skb, eseg, ihs);
@@ -183,7 +190,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
struct mlx5_wqe_inline_seg *inlseg)
{
#ifdef CONFIG_MLX5_EN_TLS
- mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls);
+ mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls);
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
@@ -202,4 +209,14 @@ static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv)
{
mlx5e_ktls_cleanup_rx(priv);
}
+
+static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv)
+{
+ return mlx5e_ktls_init_tx(priv);
+}
+
+static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv)
+{
+ mlx5e_ktls_cleanup_tx(priv);
+}
#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
index 4c4ee524176c..285d32d2fd08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
-#include <linux/netdevice.h>
+#include <mlx5_core.h>
#include "en_accel/fs_tcp.h"
#include "fs_core.h"
@@ -71,13 +71,13 @@ void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
struct sock *sk, u32 tirn,
uint32_t flow_tag)
{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
struct mlx5_flow_destination dest = {};
struct mlx5e_flow_table *ft = NULL;
- struct mlx5e_accel_fs_tcp *fs_tcp;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *flow;
struct mlx5_flow_spec *spec;
@@ -86,23 +86,21 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_tcp = priv->fs.accel_tcp;
-
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
switch (sk->sk_family) {
case AF_INET:
accel_fs_tcp_set_ipv4_flow(spec, sk);
ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
- mlx5e_dbg(HW, priv, "%s flow is %pI4:%d -> %pI4:%d\n", __func__,
- &inet_sk(sk)->inet_rcv_saddr,
- inet_sk(sk)->inet_sport,
- &inet_sk(sk)->inet_daddr,
- inet_sk(sk)->inet_dport);
+ fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__,
+ &inet_sk(sk)->inet_rcv_saddr,
+ inet_sk(sk)->inet_sport,
+ &inet_sk(sk)->inet_daddr,
+ inet_sk(sk)->inet_dport);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- if (!sk->sk_ipv6only &&
+ if (!ipv6_only_sock(sk) &&
ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
accel_fs_tcp_set_ipv4_flow(spec, sk);
ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
@@ -140,34 +138,32 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1);
if (IS_ERR(flow))
- netdev_err(priv->netdev, "mlx5_add_flow_rules() failed, flow is %ld\n",
- PTR_ERR(flow));
+ fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow));
out:
kvfree(spec);
return flow;
}
-static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
+static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs,
enum accel_fs_tcp_type type)
{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
struct mlx5e_flow_table *accel_fs_t;
struct mlx5_flow_destination dest;
- struct mlx5e_accel_fs_tcp *fs_tcp;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
int err = 0;
- fs_tcp = priv->fs.accel_tcp;
accel_fs_t = &fs_tcp->tables[type];
- dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type));
+ dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type));
rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, accel_fs type=%d, err %d\n",
- __func__, type, err);
+ fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n",
+ __func__, type, err);
return err;
}
@@ -265,9 +261,11 @@ out:
return err;
}
-static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_type type)
+static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type)
{
- struct mlx5e_flow_table *ft = &priv->fs.accel_tcp->tables[type];
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_flow_table *ft = &accel_tcp->tables[type];
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -277,21 +275,21 @@ static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_
ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs accel table id %u level %u\n",
- ft->t->id, ft->t->level);
+ fs_dbg(fs, "Created fs accel table id %u level %u\n",
+ ft->t->id, ft->t->level);
err = accel_fs_tcp_create_groups(ft, type);
if (err)
goto err;
- err = accel_fs_tcp_add_default_rule(priv, type);
+ err = accel_fs_tcp_add_default_rule(fs, type);
if (err)
goto err;
@@ -301,17 +299,18 @@ err:
return err;
}
-static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
+static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, fs_accel2tt(i), err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
return err;
}
}
@@ -319,32 +318,32 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
return 0;
}
-static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
+static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
- dest.ft = priv->fs.accel_tcp->tables[i].t;
+ dest.ft = accel_tcp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, fs_accel2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
return err;
}
}
return 0;
}
-static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i)
+static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i)
{
- struct mlx5e_accel_fs_tcp *fs_tcp;
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
- fs_tcp = priv->fs.accel_tcp;
if (IS_ERR_OR_NULL(fs_tcp->tables[i].t))
return;
@@ -353,40 +352,43 @@ static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i)
fs_tcp->tables[i].t = NULL;
}
-void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv)
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
int i;
- if (!priv->fs.accel_tcp)
+ if (!accel_tcp)
return;
- accel_fs_tcp_disable(priv);
+ accel_fs_tcp_disable(fs);
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
- accel_fs_tcp_destroy_table(priv, i);
+ accel_fs_tcp_destroy_table(fs, i);
- kfree(priv->fs.accel_tcp);
- priv->fs.accel_tcp = NULL;
+ kfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
}
-int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv)
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_accel_fs_tcp *accel_tcp;
int i, err;
- if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version))
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version))
return -EOPNOTSUPP;
- priv->fs.accel_tcp = kzalloc(sizeof(*priv->fs.accel_tcp), GFP_KERNEL);
- if (!priv->fs.accel_tcp)
+ accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL);
+ if (!accel_tcp)
return -ENOMEM;
+ mlx5e_fs_set_accel_tcp(fs, accel_tcp);
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
- err = accel_fs_tcp_create_table(priv, i);
+ err = accel_fs_tcp_create_table(fs, i);
if (err)
goto err_destroy_tables;
}
- err = accel_fs_tcp_enable(priv);
+ err = accel_fs_tcp_enable(fs);
if (err)
goto err_destroy_tables;
@@ -394,9 +396,8 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv)
err_destroy_tables:
while (--i >= 0)
- accel_fs_tcp_destroy_table(priv, i);
-
- kfree(priv->fs.accel_tcp);
- priv->fs.accel_tcp = NULL;
+ accel_fs_tcp_destroy_table(fs, i);
+ kfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
index 589235824543..a032bff482a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
@@ -4,19 +4,19 @@
#ifndef __MLX5E_ACCEL_FS_TCP_H__
#define __MLX5E_ACCEL_FS_TCP_H__
-#include "en.h"
+#include "en/fs.h"
#ifdef CONFIG_MLX5_EN_TLS
-int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv);
-void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv);
-struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs);
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
struct sock *sk, u32 tirn,
uint32_t flow_tag);
void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule);
#else
-static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv) {}
-static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
+static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; }
+static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {}
+static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
struct sock *sk, u32 tirn,
uint32_t flow_tag)
{ return ERR_PTR(-EOPNOTSUPP); }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 7cab08a2f715..a715601865d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -35,26 +35,14 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
-#include <linux/module.h>
#include "en.h"
-#include "en_accel/ipsec.h"
-#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/ipsec_fs.h"
+#include "ipsec.h"
+#include "ipsec_rxtx.h"
static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
- struct mlx5e_ipsec_sa_entry *sa;
-
- if (!x)
- return NULL;
-
- sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
- if (!sa)
- return NULL;
-
- WARN_ON(sa->x != x);
- return sa;
+ return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
}
struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
@@ -75,9 +63,9 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
return ret;
}
-static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry,
- unsigned int handle)
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
{
+ unsigned int handle = sa_entry->ipsec_obj_id;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5e_ipsec_sa_entry *_sa_entry;
unsigned long flags;
@@ -113,7 +101,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
struct xfrm_replay_state_esn *replay_esn;
u32 seq_bottom = 0;
u8 overlap;
- u32 *esn;
if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
sa_entry->esn_state.trigger = 0;
@@ -128,11 +115,9 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
htonl(seq_bottom));
- esn = &sa_entry->esn_state.esn;
sa_entry->esn_state.trigger = 1;
if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
- ++(*esn);
sa_entry->esn_state.overlap = 0;
return true;
} else if (unlikely(!overlap &&
@@ -149,7 +134,7 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
struct xfrm_state *x = sa_entry->x;
- struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
+ struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
struct aead_geniv_ctx *geniv_ctx;
struct crypto_aead *aead;
unsigned int crypto_data_len, key_len;
@@ -183,23 +168,17 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
}
- /* rx handle */
- attrs->sa_handle = sa_entry->handle;
-
- /* algo type */
- attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
/* action */
- attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
- MLX5_ACCEL_ESP_ACTION_ENCRYPT :
- MLX5_ACCEL_ESP_ACTION_DECRYPT;
+ attrs->action = (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) ?
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+ MLX5_ACCEL_ESP_ACTION_DECRYPT;
/* flags */
attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
MLX5_ACCEL_ESP_FLAGS_TUNNEL;
/* spi */
- attrs->spi = x->id.spi;
+ attrs->spi = be32_to_cpu(x->id.spi);
/* source , destination ips */
memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
@@ -227,8 +206,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
return -EINVAL;
}
if (x->props.flags & XFRM_STATE_ESN &&
- !(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_ESN)) {
+ !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_ESN)) {
netdev_info(netdev, "Cannot offload ESN xfrm states\n");
return -EINVAL;
}
@@ -275,46 +253,29 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
return -EINVAL;
}
- if (x->props.family == AF_INET6 &&
- !(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_IPV6)) {
- netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
- return -EINVAL;
- }
return 0;
}
-static int mlx5e_xfrm_fs_add_rule(struct mlx5e_priv *priv,
- struct mlx5e_ipsec_sa_entry *sa_entry)
-{
- if (!mlx5_is_ipsec_device(priv->mdev))
- return 0;
-
- return mlx5e_accel_ipsec_fs_add_rule(priv, &sa_entry->xfrm->attrs,
- sa_entry->ipsec_obj_id,
- &sa_entry->ipsec_rule);
-}
-
-static void mlx5e_xfrm_fs_del_rule(struct mlx5e_priv *priv,
- struct mlx5e_ipsec_sa_entry *sa_entry)
+static void _update_xfrm_state(struct work_struct *work)
{
- if (!mlx5_is_ipsec_device(priv->mdev))
- return;
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = container_of(
+ modify_work, struct mlx5e_ipsec_sa_entry, modify_work);
- mlx5e_accel_ipsec_fs_del_rule(priv, &sa_entry->xfrm->attrs,
- &sa_entry->ipsec_rule);
+ mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs);
}
static int mlx5e_xfrm_add_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
struct net_device *netdev = x->xso.real_dev;
- struct mlx5_accel_esp_xfrm_attrs attrs;
struct mlx5e_priv *priv;
- unsigned int sa_handle;
int err;
priv = netdev_priv(netdev);
+ if (!priv->ipsec)
+ return -EOPNOTSUPP;
err = mlx5e_xfrm_validate_state(x);
if (err)
@@ -332,33 +293,18 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
/* check esn */
mlx5e_ipsec_update_esn_state(sa_entry);
- /* create xfrm */
- mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
- sa_entry->xfrm =
- mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
- MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
- if (IS_ERR(sa_entry->xfrm)) {
- err = PTR_ERR(sa_entry->xfrm);
- goto err_sa_entry;
- }
-
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
/* create hw context */
- sa_entry->hw_context =
- mlx5_accel_esp_create_hw_context(priv->mdev,
- sa_entry->xfrm,
- &sa_handle);
- if (IS_ERR(sa_entry->hw_context)) {
- err = PTR_ERR(sa_entry->hw_context);
+ err = mlx5_ipsec_create_sa_ctx(sa_entry);
+ if (err)
goto err_xfrm;
- }
- sa_entry->ipsec_obj_id = sa_handle;
- err = mlx5e_xfrm_fs_add_rule(priv, sa_entry);
+ err = mlx5e_accel_ipsec_fs_add_rule(priv, sa_entry);
if (err)
goto err_hw_ctx;
- if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
- err = mlx5e_ipsec_sadb_rx_add(sa_entry, sa_handle);
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+ err = mlx5e_ipsec_sadb_rx_add(sa_entry);
if (err)
goto err_add_rule;
} else {
@@ -366,18 +312,16 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
}
+ INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state);
x->xso.offload_handle = (unsigned long)sa_entry;
goto out;
err_add_rule:
- mlx5e_xfrm_fs_del_rule(priv, sa_entry);
+ mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
err_hw_ctx:
- mlx5_accel_esp_free_hw_context(priv->mdev, sa_entry->hw_context);
+ mlx5_ipsec_free_sa_ctx(sa_entry);
err_xfrm:
- mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
-err_sa_entry:
kfree(sa_entry);
-
out:
return err;
}
@@ -386,10 +330,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- if (!sa_entry)
- return;
-
- if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN)
mlx5e_ipsec_sadb_rx_del(sa_entry);
}
@@ -398,24 +339,18 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
struct mlx5e_priv *priv = netdev_priv(x->xso.dev);
- if (!sa_entry)
- return;
-
- if (sa_entry->hw_context) {
- flush_workqueue(sa_entry->ipsec->wq);
- mlx5e_xfrm_fs_del_rule(priv, sa_entry);
- mlx5_accel_esp_free_hw_context(sa_entry->xfrm->mdev, sa_entry->hw_context);
- mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
- }
-
+ cancel_work_sync(&sa_entry->modify_work.work);
+ mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+ mlx5_ipsec_free_sa_ctx(sa_entry);
kfree(sa_entry);
}
int mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
- struct mlx5e_ipsec *ipsec = NULL;
+ struct mlx5e_ipsec *ipsec;
+ int ret;
- if (!MLX5_IPSEC_DEV(priv->mdev)) {
+ if (!mlx5_ipsec_device_caps(priv->mdev)) {
netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
return 0;
}
@@ -426,21 +361,27 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
hash_init(ipsec->sadb_rx);
spin_lock_init(&ipsec->sadb_rx_lock);
- ida_init(&ipsec->halloc);
- ipsec->en_priv = priv;
- ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+ ipsec->mdev = priv->mdev;
ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
priv->netdev->name);
if (!ipsec->wq) {
- kfree(ipsec);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_wq;
}
+ ret = mlx5e_accel_ipsec_fs_init(ipsec);
+ if (ret)
+ goto err_fs_init;
+
priv->ipsec = ipsec;
- mlx5e_accel_ipsec_fs_init(priv);
netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
return 0;
+
+err_fs_init:
+ destroy_workqueue(ipsec->wq);
+err_wq:
+ kfree(ipsec);
+ return (ret != -EOPNOTSUPP) ? ret : 0;
}
void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
@@ -450,10 +391,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
if (!ipsec)
return;
- mlx5e_accel_ipsec_fs_cleanup(priv);
+ mlx5e_accel_ipsec_fs_cleanup(ipsec);
destroy_workqueue(ipsec->wq);
-
- ida_destroy(&ipsec->halloc);
kfree(ipsec);
priv->ipsec = NULL;
}
@@ -473,50 +412,19 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
return true;
}
-struct mlx5e_ipsec_modify_state_work {
- struct work_struct work;
- struct mlx5_accel_esp_xfrm_attrs attrs;
- struct mlx5e_ipsec_sa_entry *sa_entry;
-};
-
-static void _update_xfrm_state(struct work_struct *work)
-{
- int ret;
- struct mlx5e_ipsec_modify_state_work *modify_work =
- container_of(work, struct mlx5e_ipsec_modify_state_work, work);
- struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
-
- ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
- &modify_work->attrs);
- if (ret)
- netdev_warn(sa_entry->ipsec->en_priv->netdev,
- "Not an IPSec offload device\n");
-
- kfree(modify_work);
-}
-
static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- struct mlx5e_ipsec_modify_state_work *modify_work;
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ &sa_entry->modify_work;
bool need_update;
- if (!sa_entry)
- return;
-
need_update = mlx5e_ipsec_update_esn_state(sa_entry);
if (!need_update)
return;
- modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
- if (!modify_work)
- return;
-
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
- modify_work->sa_entry = sa_entry;
-
- INIT_WORK(&modify_work->work, _update_xfrm_state);
- WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+ queue_work(sa_entry->ipsec->wq, &modify_work->work);
}
static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
@@ -532,11 +440,8 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
struct net_device *netdev = priv->netdev;
- if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
- !MLX5_CAP_ETH(mdev, swp)) {
- mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+ if (!mlx5_ipsec_device_caps(mdev))
return;
- }
mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
@@ -551,15 +456,12 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
- if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
- !MLX5_CAP_ETH(mdev, swp_lso)) {
+ if (!MLX5_CAP_ETH(mdev, swp_lso)) {
mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
return;
}
- if (mlx5_is_ipsec_device(mdev))
- netdev->gso_partial_features |= NETIF_F_GSO_ESP;
-
+ netdev->gso_partial_features |= NETIF_F_GSO_ESP;
mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
netdev->features |= NETIF_F_GSO_ESP;
netdev->hw_features |= NETIF_F_GSO_ESP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 6164c7f59efb..16bcceec16c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -40,11 +40,56 @@
#include <net/xfrm.h>
#include <linux/idr.h>
-#include "accel/ipsec.h"
-
#define MLX5E_IPSEC_SADB_RX_BITS 10
#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+enum mlx5_accel_esp_flags {
+ MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */
+ MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0,
+ MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1,
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+ MLX5_ACCEL_ESP_ACTION_DECRYPT,
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+struct aes_gcm_keymat {
+ u64 seq_iv;
+
+ u32 salt;
+ u32 icv_len;
+
+ u32 key_len;
+ u32 aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+ enum mlx5_accel_esp_action action;
+ u32 esn;
+ u32 spi;
+ u32 flags;
+ struct aes_gcm_keymat aes_gcm;
+
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ } saddr;
+
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ } daddr;
+
+ u8 is_ipv6;
+};
+
+enum mlx5_ipsec_cap {
+ MLX5_IPSEC_CAP_CRYPTO = 1 << 0,
+ MLX5_IPSEC_CAP_ESN = 1 << 1,
+};
+
struct mlx5e_priv;
struct mlx5e_ipsec_sw_stats {
@@ -55,37 +100,16 @@ struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_tx_drop_no_state;
atomic64_t ipsec_tx_drop_not_ip;
atomic64_t ipsec_tx_drop_trailer;
- atomic64_t ipsec_tx_drop_metadata;
-};
-
-struct mlx5e_ipsec_stats {
- u64 ipsec_dec_in_packets;
- u64 ipsec_dec_out_packets;
- u64 ipsec_dec_bypass_packets;
- u64 ipsec_enc_in_packets;
- u64 ipsec_enc_out_packets;
- u64 ipsec_enc_bypass_packets;
- u64 ipsec_dec_drop_packets;
- u64 ipsec_dec_auth_fail_packets;
- u64 ipsec_enc_drop_packets;
- u64 ipsec_add_sa_success;
- u64 ipsec_add_sa_fail;
- u64 ipsec_del_sa_success;
- u64 ipsec_del_sa_fail;
- u64 ipsec_cmd_drop;
};
struct mlx5e_accel_fs_esp;
struct mlx5e_ipsec_tx;
struct mlx5e_ipsec {
- struct mlx5e_priv *en_priv;
+ struct mlx5_core_dev *mdev;
DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
- bool no_trailer;
- spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
- struct ida halloc;
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx */
struct mlx5e_ipsec_sw_stats sw_stats;
- struct mlx5e_ipsec_stats stats;
struct workqueue_struct *wq;
struct mlx5e_accel_fs_esp *rx_fs;
struct mlx5e_ipsec_tx *tx_fs;
@@ -102,21 +126,26 @@ struct mlx5e_ipsec_rule {
struct mlx5_modify_hdr *set_modify_hdr;
};
+struct mlx5e_ipsec_modify_state_work {
+ struct work_struct work;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
struct mlx5e_ipsec_sa_entry {
struct hlist_node hlist; /* Item in SADB_RX hashtable */
struct mlx5e_ipsec_esn_state esn_state;
unsigned int handle; /* Handle in SADB_RX */
struct xfrm_state *x;
struct mlx5e_ipsec *ipsec;
- struct mlx5_accel_esp_xfrm *xfrm;
- void *hw_context;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
u32 ipsec_obj_id;
+ u32 enc_key_id;
struct mlx5e_ipsec_rule ipsec_rule;
+ struct mlx5e_ipsec_modify_state_work modify_work;
};
-void mlx5e_ipsec_build_inverse_table(void);
int mlx5e_ipsec_init(struct mlx5e_priv *priv);
void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
@@ -124,12 +153,27 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
unsigned int handle);
-#else
+void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec);
+int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec);
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry);
+
+int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
-static inline void mlx5e_ipsec_build_inverse_table(void)
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+static inline struct mlx5_core_dev *
+mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry)
{
+ return sa_entry->ipsec->mdev;
}
-
+#else
static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
return 0;
@@ -143,6 +187,10 @@ static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
{
}
+static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
#endif
#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 17da23dff0ed..b859e4a4c744 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -2,8 +2,9 @@
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#include <linux/netdevice.h>
-#include "accel/ipsec_offload.h"
-#include "ipsec_fs.h"
+#include "en.h"
+#include "en/fs.h"
+#include "ipsec.h"
#include "fs_core.h"
#define NUM_IPSEC_FTE BIT(15)
@@ -35,6 +36,7 @@ struct mlx5e_accel_fs_esp {
};
struct mlx5e_ipsec_tx {
+ struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
struct mutex mutex; /* Protect IPsec TX steering */
u32 refcnt;
@@ -58,7 +60,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_flow_handle *fte;
struct mlx5_flow_spec *spec;
- int err = 0;
+ int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
@@ -94,101 +96,27 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
goto out;
}
+ kvfree(spec);
rx_err->rule = fte;
rx_err->copy_modify_hdr = modify_hdr;
+ return 0;
out:
- if (err)
- mlx5_modify_header_dealloc(mdev, modify_hdr);
+ mlx5_modify_header_dealloc(mdev, modify_hdr);
out_spec:
kvfree(spec);
return err;
}
-static void rx_err_del_rule(struct mlx5e_priv *priv,
- struct mlx5e_ipsec_rx_err *rx_err)
-{
- if (rx_err->rule) {
- mlx5_del_flow_rules(rx_err->rule);
- rx_err->rule = NULL;
- }
-
- if (rx_err->copy_modify_hdr) {
- mlx5_modify_header_dealloc(priv->mdev, rx_err->copy_modify_hdr);
- rx_err->copy_modify_hdr = NULL;
- }
-}
-
-static void rx_err_destroy_ft(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx_err *rx_err)
-{
- rx_err_del_rule(priv, rx_err);
-
- if (rx_err->ft) {
- mlx5_destroy_flow_table(rx_err->ft);
- rx_err->ft = NULL;
- }
-}
-
-static int rx_err_create_ft(struct mlx5e_priv *priv,
- struct mlx5e_accel_fs_esp_prot *fs_prot,
- struct mlx5e_ipsec_rx_err *rx_err)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_table *ft;
- int err;
-
- ft_attr.max_fte = 1;
- ft_attr.autogroup.max_num_groups = 1;
- ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
- ft_attr.prio = MLX5E_NIC_PRIO;
- ft = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr);
- if (IS_ERR(ft)) {
- err = PTR_ERR(ft);
- netdev_err(priv->netdev, "fail to create ipsec rx inline ft err=%d\n", err);
- return err;
- }
-
- rx_err->ft = ft;
- err = rx_err_add_rule(priv, fs_prot, rx_err);
- if (err)
- goto out_err;
-
- return 0;
-
-out_err:
- mlx5_destroy_flow_table(ft);
- rx_err->ft = NULL;
- return err;
-}
-
-static void rx_fs_destroy(struct mlx5e_accel_fs_esp_prot *fs_prot)
-{
- if (fs_prot->miss_rule) {
- mlx5_del_flow_rules(fs_prot->miss_rule);
- fs_prot->miss_rule = NULL;
- }
-
- if (fs_prot->miss_group) {
- mlx5_destroy_flow_group(fs_prot->miss_group);
- fs_prot->miss_group = NULL;
- }
-
- if (fs_prot->ft) {
- mlx5_destroy_flow_table(fs_prot->ft);
- fs_prot->ft = NULL;
- }
-}
-
static int rx_fs_create(struct mlx5e_priv *priv,
struct mlx5e_accel_fs_esp_prot *fs_prot)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft = fs_prot->ft;
struct mlx5_flow_group *miss_group;
struct mlx5_flow_handle *miss_rule;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_spec *spec;
- struct mlx5_flow_table *ft;
u32 *flow_group_in;
int err = 0;
@@ -199,20 +127,6 @@ static int rx_fs_create(struct mlx5e_priv *priv,
goto out;
}
- /* Create FT */
- ft_attr.max_fte = NUM_IPSEC_FTE;
- ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL;
- ft_attr.prio = MLX5E_NIC_PRIO;
- ft_attr.autogroup.num_reserved_entries = 1;
- ft_attr.autogroup.max_num_groups = 1;
- ft = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr);
- if (IS_ERR(ft)) {
- err = PTR_ERR(ft);
- netdev_err(priv->netdev, "fail to create ipsec rx ft err=%d\n", err);
- goto out;
- }
- fs_prot->ft = ft;
-
/* Create miss_group */
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
@@ -227,19 +141,19 @@ static int rx_fs_create(struct mlx5e_priv *priv,
/* Create miss rule */
miss_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1);
if (IS_ERR(miss_rule)) {
+ mlx5_destroy_flow_group(fs_prot->miss_group);
err = PTR_ERR(miss_rule);
netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err);
goto out;
}
fs_prot->miss_rule = miss_rule;
-
out:
kvfree(flow_group_in);
kvfree(spec);
return err;
}
-static int rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
@@ -249,38 +163,75 @@ static int rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
/* The netdev unreg already happened, so all offloaded rule are already removed */
fs_prot = &accel_esp->fs_prot[type];
- rx_fs_destroy(fs_prot);
-
- rx_err_destroy_ft(priv, &fs_prot->rx_err);
+ mlx5_del_flow_rules(fs_prot->miss_rule);
+ mlx5_destroy_flow_group(fs_prot->miss_group);
+ mlx5_destroy_flow_table(fs_prot->ft);
- return 0;
+ mlx5_del_flow_rules(fs_prot->rx_err.rule);
+ mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+ mlx5_destroy_flow_table(fs_prot->rx_err.ft);
}
static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(priv->fs, false);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_table *ft;
int err;
accel_esp = priv->ipsec->rx_fs;
fs_prot = &accel_esp->fs_prot[type];
-
fs_prot->default_dest =
- mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type));
+ mlx5_ttc_get_default_dest(ttc, fs_esp2tt(type));
+
+ ft_attr.max_fte = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
- err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err);
+ fs_prot->rx_err.ft = ft;
+ err = rx_err_add_rule(priv, fs_prot, &fs_prot->rx_err);
if (err)
- return err;
+ goto err_add;
+
+ /* Create FT */
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+ ft_attr.autogroup.num_reserved_entries = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_fs_ft;
+ }
+ fs_prot->ft = ft;
err = rx_fs_create(priv, fs_prot);
if (err)
- rx_destroy(priv, type);
+ goto err_fs;
+
+ return 0;
+err_fs:
+ mlx5_destroy_flow_table(fs_prot->ft);
+err_fs_ft:
+ mlx5_del_flow_rules(fs_prot->rx_err.rule);
+ mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+err_add:
+ mlx5_destroy_flow_table(fs_prot->rx_err.ft);
return err;
}
static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5_flow_destination dest = {};
struct mlx5e_accel_fs_esp *accel_esp;
@@ -289,21 +240,21 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
accel_esp = priv->ipsec->rx_fs;
fs_prot = &accel_esp->fs_prot[type];
mutex_lock(&fs_prot->prot_mutex);
- if (fs_prot->refcnt++)
- goto out;
+ if (fs_prot->refcnt)
+ goto skip;
/* create FT */
err = rx_create(priv, type);
- if (err) {
- fs_prot->refcnt--;
+ if (err)
goto out;
- }
/* connect */
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = fs_prot->ft;
- mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest);
+ mlx5_ttc_fwd_dest(ttc, fs_esp2tt(type), &dest);
+skip:
+ fs_prot->refcnt++;
out:
mutex_unlock(&fs_prot->prot_mutex);
return err;
@@ -311,17 +262,19 @@ out:
static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
accel_esp = priv->ipsec->rx_fs;
fs_prot = &accel_esp->fs_prot[type];
mutex_lock(&fs_prot->prot_mutex);
- if (--fs_prot->refcnt)
+ fs_prot->refcnt--;
+ if (fs_prot->refcnt)
goto out;
/* disconnect */
- mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type));
+ mlx5_ttc_fwd_default_dest(ttc, fs_esp2tt(type));
/* remove FT */
rx_destroy(priv, type);
@@ -338,15 +291,9 @@ static int tx_create(struct mlx5e_priv *priv)
struct mlx5_flow_table *ft;
int err;
- priv->fs.egress_ns =
- mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_EGRESS_KERNEL);
- if (!priv->fs.egress_ns)
- return -EOPNOTSUPP;
-
ft_attr.max_fte = NUM_IPSEC_FTE;
ft_attr.autogroup.max_num_groups = 1;
- ft = mlx5_create_auto_grouped_flow_table(priv->fs.egress_ns, &ft_attr);
+ ft = mlx5_create_auto_grouped_flow_table(ipsec->tx_fs->ns, &ft_attr);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
@@ -356,32 +303,20 @@ static int tx_create(struct mlx5e_priv *priv)
return 0;
}
-static void tx_destroy(struct mlx5e_priv *priv)
-{
- struct mlx5e_ipsec *ipsec = priv->ipsec;
-
- if (IS_ERR_OR_NULL(ipsec->tx_fs->ft))
- return;
-
- mlx5_destroy_flow_table(ipsec->tx_fs->ft);
- ipsec->tx_fs->ft = NULL;
-}
-
static int tx_ft_get(struct mlx5e_priv *priv)
{
struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
int err = 0;
mutex_lock(&tx_fs->mutex);
- if (tx_fs->refcnt++)
- goto out;
+ if (tx_fs->refcnt)
+ goto skip;
err = tx_create(priv);
- if (err) {
- tx_fs->refcnt--;
+ if (err)
goto out;
- }
-
+skip:
+ tx_fs->refcnt++;
out:
mutex_unlock(&tx_fs->mutex);
return err;
@@ -392,11 +327,11 @@ static void tx_ft_put(struct mlx5e_priv *priv)
struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
mutex_lock(&tx_fs->mutex);
- if (--tx_fs->refcnt)
+ tx_fs->refcnt--;
+ if (tx_fs->refcnt)
goto out;
- tx_destroy(priv);
-
+ mlx5_destroy_flow_table(tx_fs->ft);
out:
mutex_unlock(&tx_fs->mutex);
}
@@ -424,8 +359,8 @@ static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
/* SPI number */
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi);
- MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi,
- be32_to_cpu(attrs->spi));
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.outer_esp_spi, attrs->spi);
if (ip_version == 4) {
memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -453,16 +388,18 @@ static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
0xff, 16);
}
- flow_act->ipsec_obj_id = ipsec_obj_id;
+ flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
+ flow_act->crypto.obj_id = ipsec_obj_id;
flow_act->flags |= FLOW_ACT_NO_APPEND;
}
static int rx_add_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 ipsec_obj_id,
- struct mlx5e_ipsec_rule *ipsec_rule)
+ struct mlx5e_ipsec_sa_entry *sa_entry)
{
u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
struct mlx5_modify_hdr *modify_hdr = NULL;
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5_flow_destination dest = {};
@@ -508,7 +445,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
flow_act.modify_hdr = modify_hdr;
@@ -536,9 +473,7 @@ out:
}
static int tx_add_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 ipsec_obj_id,
- struct mlx5e_ipsec_rule *ipsec_rule)
+ struct mlx5e_ipsec_sa_entry *sa_entry)
{
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
@@ -555,7 +490,8 @@ static int tx_add_rule(struct mlx5e_priv *priv,
goto out;
}
- setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+ setup_fte_common(&sa_entry->attrs, sa_entry->ipsec_obj_id, spec,
+ &flow_act);
/* Add IPsec indicator in metadata_reg_a */
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
@@ -565,16 +501,16 @@ static int tx_add_rule(struct mlx5e_priv *priv,
MLX5_ETH_WQE_FT_META_IPSEC);
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
- MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT;
rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
- attrs->action, err);
+ sa_entry->attrs.action, err);
goto out;
}
- ipsec_rule->rule = rule;
+ sa_entry->ipsec_rule.rule = rule;
out:
kvfree(spec);
@@ -583,133 +519,88 @@ out:
return err;
}
-static void rx_del_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- struct mlx5e_ipsec_rule *ipsec_rule)
-{
- mlx5_del_flow_rules(ipsec_rule->rule);
- ipsec_rule->rule = NULL;
-
- mlx5_modify_header_dealloc(priv->mdev, ipsec_rule->set_modify_hdr);
- ipsec_rule->set_modify_hdr = NULL;
-
- rx_ft_put(priv, attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
-}
-
-static void tx_del_rule(struct mlx5e_priv *priv,
- struct mlx5e_ipsec_rule *ipsec_rule)
-{
- mlx5_del_flow_rules(ipsec_rule->rule);
- ipsec_rule->rule = NULL;
-
- tx_ft_put(priv);
-}
-
int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 ipsec_obj_id,
- struct mlx5e_ipsec_rule *ipsec_rule)
+ struct mlx5e_ipsec_sa_entry *sa_entry)
{
- if (!priv->ipsec->rx_fs)
- return -EOPNOTSUPP;
+ if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+ return tx_add_rule(priv, sa_entry);
- if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
- return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
- else
- return tx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+ return rx_add_rule(priv, sa_entry);
}
void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- struct mlx5e_ipsec_rule *ipsec_rule)
+ struct mlx5e_ipsec_sa_entry *sa_entry)
{
- if (!priv->ipsec->rx_fs)
- return;
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
- if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
- rx_del_rule(priv, attrs, ipsec_rule);
- else
- tx_del_rule(priv, ipsec_rule);
-}
+ mlx5_del_flow_rules(ipsec_rule->rule);
-static void fs_cleanup_tx(struct mlx5e_priv *priv)
-{
- mutex_destroy(&priv->ipsec->tx_fs->mutex);
- WARN_ON(priv->ipsec->tx_fs->refcnt);
- kfree(priv->ipsec->tx_fs);
- priv->ipsec->tx_fs = NULL;
+ if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) {
+ tx_ft_put(priv);
+ return;
+ }
+
+ mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr);
+ rx_ft_put(priv,
+ sa_entry->attrs.is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
}
-static void fs_cleanup_rx(struct mlx5e_priv *priv)
+void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
{
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
enum accel_fs_esp_type i;
- accel_esp = priv->ipsec->rx_fs;
+ if (!ipsec->rx_fs)
+ return;
+
+ mutex_destroy(&ipsec->tx_fs->mutex);
+ WARN_ON(ipsec->tx_fs->refcnt);
+ kfree(ipsec->tx_fs);
+
+ accel_esp = ipsec->rx_fs;
for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
fs_prot = &accel_esp->fs_prot[i];
mutex_destroy(&fs_prot->prot_mutex);
WARN_ON(fs_prot->refcnt);
}
- kfree(priv->ipsec->rx_fs);
- priv->ipsec->rx_fs = NULL;
-}
-
-static int fs_init_tx(struct mlx5e_priv *priv)
-{
- priv->ipsec->tx_fs =
- kzalloc(sizeof(struct mlx5e_ipsec_tx), GFP_KERNEL);
- if (!priv->ipsec->tx_fs)
- return -ENOMEM;
-
- mutex_init(&priv->ipsec->tx_fs->mutex);
- return 0;
+ kfree(ipsec->rx_fs);
}
-static int fs_init_rx(struct mlx5e_priv *priv)
+int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
{
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_namespace *ns;
enum accel_fs_esp_type i;
+ int err = -ENOMEM;
+
+ ns = mlx5_get_flow_namespace(ipsec->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS_IPSEC);
+ if (!ns)
+ return -EOPNOTSUPP;
- priv->ipsec->rx_fs =
- kzalloc(sizeof(struct mlx5e_accel_fs_esp), GFP_KERNEL);
- if (!priv->ipsec->rx_fs)
+ ipsec->tx_fs = kzalloc(sizeof(*ipsec->tx_fs), GFP_KERNEL);
+ if (!ipsec->tx_fs)
return -ENOMEM;
- accel_esp = priv->ipsec->rx_fs;
+ ipsec->rx_fs = kzalloc(sizeof(*ipsec->rx_fs), GFP_KERNEL);
+ if (!ipsec->rx_fs)
+ goto err_rx;
+
+ mutex_init(&ipsec->tx_fs->mutex);
+ ipsec->tx_fs->ns = ns;
+
+ accel_esp = ipsec->rx_fs;
for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
fs_prot = &accel_esp->fs_prot[i];
mutex_init(&fs_prot->prot_mutex);
}
return 0;
-}
-
-void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv)
-{
- if (!priv->ipsec->rx_fs)
- return;
-
- fs_cleanup_tx(priv);
- fs_cleanup_rx(priv);
-}
-
-int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv)
-{
- int err;
-
- if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec)
- return -EOPNOTSUPP;
-
- err = fs_init_tx(priv);
- if (err)
- return err;
-
- err = fs_init_rx(priv);
- if (err)
- fs_cleanup_tx(priv);
+err_rx:
+ kfree(ipsec->tx_fs);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h
deleted file mode 100644
index 3389b3bb3ef8..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
-
-#ifndef __MLX5_IPSEC_STEERING_H__
-#define __MLX5_IPSEC_STEERING_H__
-
-#include "en.h"
-#include "ipsec.h"
-#include "accel/ipsec_offload.h"
-#include "en/fs.h"
-
-#ifdef CONFIG_MLX5_EN_IPSEC
-void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv);
-int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv);
-int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 ipsec_obj_id,
- struct mlx5e_ipsec_rule *ipsec_rule);
-void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
- struct mlx5_accel_esp_xfrm_attrs *attrs,
- struct mlx5e_ipsec_rule *ipsec_rule);
-#else
-static inline void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) {}
-static inline int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { return 0; }
-#endif
-#endif /* __MLX5_IPSEC_STEERING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
new file mode 100644
index 000000000000..792724ce7336
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "ipsec.h"
+#include "lib/mlx5.h"
+
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ u32 caps = 0;
+
+ if (!MLX5_CAP_GEN(mdev, ipsec_offload))
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return 0;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return 0;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt))
+ return 0;
+
+ if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) ||
+ !MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt))
+ return 0;
+
+ if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) &&
+ MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
+ caps |= MLX5_IPSEC_CAP_CRYPTO;
+
+ if (!caps)
+ return 0;
+
+ if (MLX5_CAP_IPSEC(mdev, ipsec_esn))
+ caps |= MLX5_IPSEC_CAP_ESN;
+
+ /* We can accommodate up to 2^24 different IPsec objects
+ * because we use up to 24 bit in flow table metadata
+ * to hold the IPsec Object unique handle.
+ */
+ WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24);
+ return caps;
+}
+EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps);
+
+static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {};
+ void *obj, *salt_p, *salt_iv_p;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object);
+
+ /* salt and seq_iv */
+ salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt);
+ memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt));
+
+ MLX5_SET(ipsec_obj, obj, icv_length, MLX5_IPSEC_OBJECT_ICV_LEN_16B);
+ salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv);
+ memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv));
+ /* esn */
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+ MLX5_SET(ipsec_obj, obj, esn_en, 1);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
+ MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
+ }
+
+ MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id);
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ sa_entry->ipsec_obj_id =
+ MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void mlx5_destroy_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct aes_gcm_keymat *aes_gcm = &sa_entry->attrs.aes_gcm;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ int err;
+
+ /* key */
+ err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key,
+ aes_gcm->key_len / BITS_PER_BYTE,
+ MLX5_ACCEL_OBJ_IPSEC_KEY,
+ &sa_entry->enc_key_id);
+ if (err) {
+ mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err);
+ return err;
+ }
+
+ err = mlx5_create_ipsec_obj(sa_entry);
+ if (err) {
+ mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err);
+ goto err_enc_key;
+ }
+
+ return 0;
+
+err_enc_key:
+ mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id);
+ return err;
+}
+
+void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+
+ mlx5_destroy_ipsec_obj(sa_entry);
+ mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id);
+}
+
+static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)];
+ u64 modify_field_select = 0;
+ u64 general_obj_types;
+ void *obj;
+ int err;
+
+ if (!(attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED))
+ return 0;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return -EINVAL;
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n",
+ sa_entry->ipsec_obj_id, err);
+ return err;
+ }
+
+ obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object);
+ modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select);
+
+ /* esn */
+ if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) ||
+ !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB))
+ return -EOPNOTSUPP;
+
+ obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object);
+ MLX5_SET64(ipsec_obj, obj, modify_field_select,
+ MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP |
+ MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
+ MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ int err;
+
+ err = mlx5_modify_ipsec_obj(sa_entry, attrs);
+ if (err)
+ return;
+
+ memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 2db9573a3fe6..6859f1c1a831 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -34,78 +34,15 @@
#include <crypto/aead.h>
#include <net/xfrm.h>
#include <net/esp.h>
-#include "accel/ipsec_offload.h"
-#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/ipsec.h"
-#include "accel/accel.h"
+#include "ipsec.h"
+#include "ipsec_rxtx.h"
#include "en.h"
enum {
- MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
- MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
- MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
-};
-
-struct mlx5e_ipsec_rx_metadata {
- unsigned char nexthdr;
- __be32 sa_handle;
-} __packed;
-
-enum {
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
};
-struct mlx5e_ipsec_tx_metadata {
- __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */
- __be16 seq; /* LSBs of the first TCP seq, only for LSO */
- u8 esp_next_proto; /* Next protocol of ESP */
-} __packed;
-
-struct mlx5e_ipsec_metadata {
- unsigned char syndrome;
- union {
- unsigned char raw[5];
- /* from FPGA to host, on successful decrypt */
- struct mlx5e_ipsec_rx_metadata rx;
- /* from host to FPGA */
- struct mlx5e_ipsec_tx_metadata tx;
- } __packed content;
- /* packet type ID field */
- __be16 ethertype;
-} __packed;
-
-#define MAX_LSO_MSS 2048
-
-/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
-static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
-
-static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
-{
- return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
-}
-
-static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
-{
- struct mlx5e_ipsec_metadata *mdata;
- struct ethhdr *eth;
-
- if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
- return ERR_PTR(-ENOMEM);
-
- eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
- skb->mac_header -= sizeof(*mdata);
- mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
-
- memmove(skb->data, skb->data + sizeof(*mdata),
- 2 * ETH_ALEN);
-
- eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
-
- memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
- return mdata;
-}
-
static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
{
unsigned int alen = crypto_aead_authsize(x->data);
@@ -157,11 +94,20 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
/* Tunnel mode */
if (mode == XFRM_MODE_TUNNEL) {
eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
if (xo->proto == IPPROTO_IPV6)
eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
- if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
+
+ switch (xo->inner_ipproto) {
+ case IPPROTO_UDP:
eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ /* IP | ESP | IP | [TCP | UDP] */
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ default:
+ break;
+ }
return;
}
@@ -235,40 +181,6 @@ void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
skb_store_bits(skb, iv_offset, &seqno, 8);
}
-static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
- struct mlx5e_ipsec_metadata *mdata,
- struct xfrm_offload *xo)
-{
- struct ip_esp_hdr *esph;
- struct tcphdr *tcph;
-
- if (skb_is_gso(skb)) {
- /* Add LSO metadata indication */
- esph = ip_esp_hdr(skb);
- tcph = inner_tcp_hdr(skb);
- netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
- skb->network_header,
- skb->transport_header,
- skb->inner_network_header,
- skb->inner_transport_header);
- netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
- skb->len, skb_shinfo(skb)->gso_size,
- ntohs(tcph->source), ntohs(tcph->dest),
- ntohl(tcph->seq), ntohl(esph->seq_no));
- mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
- mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
- mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
- } else {
- mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
- }
- mdata->content.tx.esp_next_proto = xo->proto;
-
- netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
- mdata->syndrome, mdata->content.tx.esp_next_proto,
- ntohs(mdata->content.tx.mss_inv),
- ntohs(mdata->content.tx.seq));
-}
-
void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_ipsec_state *ipsec_st,
struct mlx5_wqe_inline_seg *inlseg)
@@ -289,16 +201,14 @@ static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
ipsec_st->x = x;
ipsec_st->xo = xo;
- if (mlx5_is_ipsec_device(priv->mdev)) {
- aead = x->data;
- alen = crypto_aead_authsize(aead);
- blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(skb->len + 2, blksize);
- plen = max_t(u32, clen - skb->len, 4);
- tailen = plen + alen;
- ipsec_st->plen = plen;
- ipsec_st->tailen = tailen;
- }
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2, blksize);
+ plen = max_t(u32, clen - skb->len, 4);
+ tailen = plen + alen;
+ ipsec_st->plen = plen;
+ ipsec_st->tailen = tailen;
return 0;
}
@@ -331,19 +241,17 @@ void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
((struct iphdr *)skb_network_header(skb))->protocol :
((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
- if (mlx5_is_ipsec_device(priv->mdev)) {
- eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
- eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
- encap = x->encap;
- if (!encap) {
- eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
- } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
- eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
- }
+ eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+ eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+ encap = x->encap;
+ if (!encap) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+ } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
}
}
@@ -354,7 +262,6 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo = xfrm_offload(skb);
struct mlx5e_ipsec_sa_entry *sa_entry;
- struct mlx5e_ipsec_metadata *mdata;
struct xfrm_state *x;
struct sec_path *sp;
@@ -383,19 +290,8 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
goto drop;
}
- if (MLX5_CAP_GEN(priv->mdev, fpga)) {
- mdata = mlx5e_ipsec_add_metadata(skb);
- if (IS_ERR(mdata)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
- goto drop;
- }
- }
-
sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
sa_entry->set_iv_op(skb, x, xo);
- if (MLX5_CAP_GEN(priv->mdev, fpga))
- mlx5e_ipsec_set_metadata(skb, mdata, xo);
-
mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
return true;
@@ -405,79 +301,6 @@ drop:
return false;
}
-static inline struct xfrm_state *
-mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
- struct mlx5e_ipsec_metadata *mdata)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct xfrm_offload *xo;
- struct xfrm_state *xs;
- struct sec_path *sp;
- u32 sa_handle;
-
- sp = secpath_set(skb);
- if (unlikely(!sp)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
- return NULL;
- }
-
- sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
- xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
- if (unlikely(!xs)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
- return NULL;
- }
-
- sp = skb_sec_path(skb);
- sp->xvec[sp->len++] = xs;
- sp->olen++;
-
- xo = xfrm_offload(skb);
- xo->flags = CRYPTO_DONE;
- switch (mdata->syndrome) {
- case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
- xo->status = CRYPTO_SUCCESS;
- if (likely(priv->ipsec->no_trailer)) {
- xo->flags |= XFRM_ESP_NO_TRAILER;
- xo->proto = mdata->content.rx.nexthdr;
- }
- break;
- case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
- xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
- break;
- case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
- xo->status = CRYPTO_INVALID_PROTOCOL;
- break;
- default:
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
- return NULL;
- }
- return xs;
-}
-
-struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
- struct sk_buff *skb, u32 *cqe_bcnt)
-{
- struct mlx5e_ipsec_metadata *mdata;
- struct xfrm_state *xs;
-
- if (!is_metadata_hdr_valid(skb))
- return skb;
-
- /* Use the metadata */
- mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
- xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
- if (unlikely(!xs)) {
- kfree_skb(skb);
- return NULL;
- }
-
- remove_metadata_hdr(skb);
- *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
-
- return skb;
-}
-
enum {
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED,
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED,
@@ -509,7 +332,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
return;
}
- sp = skb_sec_path(skb);
sp->xvec[sp->len++] = xs;
sp->olen++;
@@ -519,8 +341,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
xo->status = CRYPTO_SUCCESS;
- if (WARN_ON_ONCE(priv->ipsec->no_trailer))
- xo->flags |= XFRM_ESP_NO_TRAILER;
break;
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED:
xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
@@ -532,21 +352,3 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
}
}
-
-void mlx5e_ipsec_build_inverse_table(void)
-{
- u16 mss_inv;
- u32 mss;
-
- /* Calculate 1/x inverse table for use in GSO data path.
- * Using this table, we provide the IPSec accelerator with the value of
- * 1/gso_size so that it can infer the position of each segment inside
- * the GSO, and increment the ESP sequence number, and generate the IV.
- * The HW needs this value in Q0.16 fixed-point number format
- */
- mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
- for (mss = 2; mss < MAX_LSO_MSS; mss++) {
- mss_inv = div_u64(1ULL << 32, mss) >> 16;
- mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
- }
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index b98db50c3418..1878a70b9031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -39,9 +39,9 @@
#include "en.h"
#include "en/txrx.h"
-/* Bit31: IPsec marker, Bit30-24: IPsec syndrome, Bit23-0: IPsec obj id */
+/* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */
#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1)
-#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(6, 0))
+#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0))
#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0))
struct mlx5e_accel_tx_ipsec_state {
@@ -53,9 +53,6 @@ struct mlx5e_accel_tx_ipsec_state {
#ifdef CONFIG_MLX5_EN_IPSEC
-struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
- struct sk_buff *skb, u32 *cqe_bcnt);
-
void mlx5e_ipsec_inverse_table_init(void);
void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
@@ -80,11 +77,6 @@ static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
}
-static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
-{
- return ipsec_st->x;
-}
-
static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
{
return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
@@ -131,14 +123,17 @@ static inline bool
mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
- struct xfrm_offload *xo = xfrm_offload(skb);
+ u8 inner_ipproto;
if (!mlx5e_ipsec_eseg_meta(eseg))
return false;
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
- if (xo->inner_ipproto) {
- eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM;
+ inner_ipproto = xfrm_offload(skb)->inner_ipproto;
+ if (inner_ipproto) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
sq->stats->csum_partial_inner++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
index 5cb936541b9e..9de84821dafb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -35,27 +35,7 @@
#include <net/sock.h>
#include "en.h"
-#include "accel/ipsec.h"
-#include "fpga/sdk.h"
-#include "en_accel/ipsec.h"
-#include "fpga/ipsec.h"
-
-static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
-};
+#include "ipsec.h"
static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
@@ -65,13 +45,11 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
};
#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
-#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw)
@@ -103,45 +81,4 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw)
return idx;
}
-static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw)
-{
- return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0;
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw)
-{
- int ret = 0;
-
- if (priv->ipsec)
- ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
- NUM_IPSEC_HW_COUNTERS);
- if (ret)
- memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw)
-{
- unsigned int i;
-
- if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev))
- for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
- strcpy(data + (idx++) * ETH_GSTRING_LEN,
- mlx5e_ipsec_hw_stats_desc[i].format);
-
- return idx;
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw)
-{
- int i;
-
- if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev))
- for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
- mlx5e_ipsec_hw_stats_desc,
- i);
- return idx;
-}
-
MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0);
-MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index d93aadbf10da..da2184c94203 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -2,11 +2,49 @@
// Copyright (c) 2019 Mellanox Technologies.
#include "en.h"
-#include "en_accel/tls.h"
+#include "lib/mlx5.h"
#include "en_accel/ktls.h"
#include "en_accel/ktls_utils.h"
#include "en_accel/fs_tcp.h"
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id)
+{
+ u32 sz_bytes;
+ void *key;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return mlx5_create_encryption_key(mdev, key, sz_bytes,
+ MLX5_ACCEL_OBJ_TLS_KEY,
+ p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ mlx5_destroy_encryption_key(mdev, key_id);
+}
+
static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
enum tls_offload_ctx_dir direction,
struct tls_crypto_info *crypto_info,
@@ -16,7 +54,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
struct mlx5_core_dev *mdev = priv->mdev;
int err;
- if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
+ if (!mlx5e_ktls_type_check(mdev, crypto_info))
return -EOPNOTSUPP;
if (direction == TLS_OFFLOAD_CTX_DIR_TX)
@@ -54,20 +92,38 @@ static const struct tlsdev_ops mlx5e_ktls_ops = {
.tls_dev_resync = mlx5e_ktls_resync,
};
+bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+
+ if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
+
+ /* Check the possibility to post the required ICOSQ WQEs. */
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS))
+ return false;
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS))
+ return false;
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS))
+ return false;
+
+ return true;
+}
+
void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- if (!mlx5e_accel_is_ktls_tx(mdev) && !mlx5e_accel_is_ktls_rx(mdev))
+ if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev))
return;
- if (mlx5e_accel_is_ktls_tx(mdev)) {
+ if (mlx5e_is_ktls_tx(mdev)) {
netdev->hw_features |= NETIF_F_HW_TLS_TX;
netdev->features |= NETIF_F_HW_TLS_TX;
}
- if (mlx5e_accel_is_ktls_rx(mdev))
+ if (mlx5e_is_ktls_rx(mdev))
netdev->hw_features |= NETIF_F_HW_TLS_RX;
netdev->tlsdev_ops = &mlx5e_ktls_ops;
@@ -80,9 +136,9 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
if (enable)
- err = mlx5e_accel_fs_tcp_create(priv);
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
else
- mlx5e_accel_fs_tcp_destroy(priv);
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
mutex_unlock(&priv->state_lock);
return err;
@@ -92,7 +148,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
{
int err;
- if (!mlx5e_accel_is_ktls_rx(priv->mdev))
+ if (!mlx5e_is_ktls_rx(priv->mdev))
return 0;
priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx");
@@ -100,7 +156,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
return -ENOMEM;
if (priv->netdev->features & NETIF_F_HW_TLS_RX) {
- err = mlx5e_accel_fs_tcp_create(priv);
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
if (err) {
destroy_workqueue(priv->tls->rx_wq);
return err;
@@ -112,11 +168,32 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
{
- if (!mlx5e_accel_is_ktls_rx(priv->mdev))
+ if (!mlx5e_is_ktls_rx(priv->mdev))
return;
if (priv->netdev->features & NETIF_F_HW_TLS_RX)
- mlx5e_accel_fs_tcp_destroy(priv);
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
destroy_workqueue(priv->tls->rx_wq);
}
+
+int mlx5e_ktls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls;
+
+ if (!mlx5e_is_ktls_device(priv->mdev))
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
+{
+ kfree(priv->tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 5833deb2354c..1c35045e41fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -4,11 +4,51 @@
#ifndef __MLX5E_KTLS_H__
#define __MLX5E_KTLS_H__
+#include <linux/tls.h>
+#include <net/tls.h>
#include "en.h"
#ifdef CONFIG_MLX5_EN_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+ if (is_kdump_kernel())
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ return (MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128) ||
+ MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256));
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info)
+{
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256);
+ break;
+ }
+
+ return false;
+}
void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv);
int mlx5e_ktls_init_rx(struct mlx5e_priv *priv);
void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv);
int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable);
@@ -16,27 +56,46 @@ struct mlx5e_ktls_resync_resp *
mlx5e_ktls_rx_resync_create_resp_list(void);
void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list);
-static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
+static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev)
{
- return !is_kdump_kernel() &&
- mlx5_accel_is_ktls_tx(mdev);
+ return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx);
}
-static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
+bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev);
+
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_ctx;
+ atomic64_t tx_tls_del;
+ atomic64_t tx_tls_pool_alloc;
+ atomic64_t tx_tls_pool_free;
+ atomic64_t rx_tls_ctx;
+ atomic64_t rx_tls_del;
+};
+
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+ struct workqueue_struct *rx_wq;
+ struct mlx5e_tls_tx_pool *tx_pool;
+};
+
+int mlx5e_ktls_init(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv);
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
{
- return !is_kdump_kernel() &&
- mlx5_accel_is_ktls_rx(mdev);
}
-static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev)
+static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
{
- return !is_kdump_kernel() &&
- mlx5_accel_is_ktls_device(mdev);
+ return 0;
}
-#else
-
-static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
{
}
@@ -64,10 +123,23 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
static inline void
mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {}
-static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) { return false; }
-static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) { return false; }
-static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ return false;
+}
+
+static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ return 0;
+}
+static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
#endif
#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 15711814d2d2..3e54834747ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -3,7 +3,7 @@
#include <net/inet6_hashtables.h>
#include "en_accel/en_accel.h"
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
#include "en_accel/ktls_txrx.h"
#include "en_accel/ktls_utils.h"
#include "en_accel/fs_tcp.h"
@@ -43,7 +43,7 @@ struct mlx5e_ktls_rx_resync_ctx {
};
struct mlx5e_ktls_offload_context_rx {
- struct tls12_crypto_info_aes_gcm_128 crypto_info;
+ union mlx5e_crypto_info crypto_info;
struct accel_rule rule;
struct sock *sk;
struct mlx5e_rq_stats *rq_stats;
@@ -111,7 +111,7 @@ static void accel_rule_handle_work(struct work_struct *work)
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
goto out;
- rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk,
+ rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk,
mlx5e_tir_get_tirn(&priv_rx->tir),
MLX5_FS_DEFAULT_FLOW_TAG);
if (!IS_ERR_OR_NULL(rule))
@@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx,
struct mlx5e_ktls_offload_context_rx **ctx =
__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) >
- TLS_OFFLOAD_CONTEXT_SIZE_RX);
+ BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX);
*ctx = priv_rx;
}
@@ -363,7 +362,6 @@ static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx,
struct mlx5e_channel *c)
{
- struct tls12_crypto_info_aes_gcm_128 *info = &priv_rx->crypto_info;
struct mlx5e_ktls_resync_resp *ktls_resync;
struct mlx5e_icosq *sq;
bool trigger_poll;
@@ -374,7 +372,31 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r
spin_lock_bh(&ktls_resync->lock);
spin_lock_bh(&priv_rx->lock);
- memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
+ switch (priv_rx->crypto_info.crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ &priv_rx->crypto_info.crypto_info_128;
+
+ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be,
+ sizeof(info->rec_seq));
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ &priv_rx->crypto_info.crypto_info_256;
+
+ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be,
+ sizeof(info->rec_seq));
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ priv_rx->crypto_info.crypto_info.cipher_type);
+ spin_unlock_bh(&priv_rx->lock);
+ spin_unlock_bh(&ktls_resync->lock);
+ return;
+ }
+
if (list_empty(&priv_rx->list)) {
list_add_tail(&priv_rx->list, &ktls_resync->list);
trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
@@ -462,6 +484,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
{
struct ethhdr *eth = (struct ethhdr *)(skb->data);
struct net_device *netdev = rq->netdev;
+ struct net *net = dev_net(netdev);
struct sock *sk = NULL;
unsigned int datalen;
struct iphdr *iph;
@@ -476,7 +499,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
depth += sizeof(struct iphdr);
th = (void *)iph + sizeof(struct iphdr);
- sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
iph->saddr, th->source, iph->daddr,
th->dest, netdev->ifindex);
#if IS_ENABLED(CONFIG_IPV6)
@@ -486,7 +509,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
depth += sizeof(struct ipv6hdr);
th = (void *)ipv6h + sizeof(struct ipv6hdr);
- sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
&ipv6h->saddr, th->source,
&ipv6h->daddr, ntohs(th->dest),
netdev->ifindex, 0);
@@ -604,14 +627,26 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
INIT_LIST_HEAD(&priv_rx->list);
spin_lock_init(&priv_rx->lock);
- priv_rx->crypto_info =
- *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ priv_rx->crypto_info.crypto_info_128 =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ priv_rx->crypto_info.crypto_info_256 =
+ *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->cipher_type);
+ return -EOPNOTSUPP;
+ }
rxq = mlx5e_ktls_sk_get_rxq(sk);
priv_rx->rxq = rxq;
priv_rx->sk = sk;
- priv_rx->rq_stats = &priv->channel_stats[rxq].rq;
+ priv_rx->rq_stats = &priv->channel_stats[rxq]->rq;
priv_rx->sw_stats = &priv->tls->sw_stats;
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
index 56e7b2aee85f..7c1c0eb16787 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
@@ -36,18 +36,13 @@
#include "en.h"
#include "fpga/sdk.h"
-#include "en_accel/tls.h"
-
-static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
-};
+#include "en_accel/ktls.h"
static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) },
{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) },
{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) },
{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) },
};
@@ -55,51 +50,43 @@ static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
-static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv)
-{
- if (!priv->tls)
- return NULL;
- if (mlx5e_accel_is_ktls_device(priv->mdev))
- return mlx5e_ktls_sw_stats_desc;
- return mlx5e_tls_sw_stats_desc;
-}
-
-int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv)
{
if (!priv->tls)
return 0;
- if (mlx5e_accel_is_ktls_device(priv->mdev))
- return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
- return ARRAY_SIZE(mlx5e_tls_sw_stats_desc);
+
+ return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
}
-int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
{
- const struct counter_desc *stats_desc;
unsigned int i, n, idx = 0;
- stats_desc = get_tls_atomic_stats(priv);
- n = mlx5e_tls_get_count(priv);
+ if (!priv->tls)
+ return 0;
+
+ n = mlx5e_ktls_get_count(priv);
for (i = 0; i < n; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
- stats_desc[i].format);
+ mlx5e_ktls_sw_stats_desc[i].format);
return n;
}
-int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
{
- const struct counter_desc *stats_desc;
unsigned int i, n, idx = 0;
- stats_desc = get_tls_atomic_stats(priv);
- n = mlx5e_tls_get_count(priv);
+ if (!priv->tls)
+ return 0;
+
+ n = mlx5e_ktls_get_count(priv);
for (i = 0; i < n; i++)
- data[idx++] =
- MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
- stats_desc, i);
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_ktls_sw_stats_desc,
+ i);
return n;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 9ad3459fb63a..2e0335246967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2019 Mellanox Technologies.
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
#include "en_accel/ktls_txrx.h"
#include "en_accel/ktls_utils.h"
@@ -27,38 +27,78 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
{
u16 num_dumps, stop_room = 0;
- if (!mlx5e_accel_is_ktls_tx(mdev))
+ if (!mlx5e_is_ktls_tx(mdev))
return 0;
num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
- stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
- stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
- stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
+ stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS);
+ stop_room += 1; /* fence nop */
return stop_room;
}
+static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc)
+{
+ MLX5_SET(tisc, tisc, tls_en, 1);
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
+}
+
static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
- void *tisc;
- tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+ mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx));
- MLX5_SET(tisc, tisc, tls_en, 1);
+ return mlx5_core_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev,
+ struct mlx5_async_ctx *async_ctx,
+ u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
- return mlx5e_create_tis(mdev, in, tisn);
+ mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx));
+ MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
+
+ return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in),
+ out, outlen, callback, context);
+}
+
+static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn,
+ struct mlx5_async_ctx *async_ctx,
+ u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+
+ return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in),
+ out, outlen, callback, context);
}
struct mlx5e_ktls_offload_context_tx {
- struct tls_offload_context_tx *tx_ctx;
- struct tls12_crypto_info_aes_gcm_128 crypto_info;
- struct mlx5e_tls_sw_stats *sw_stats;
+ /* fast path */
u32 expected_seq;
u32 tisn;
- u32 key_id;
bool ctx_post_pending;
+ /* control / resync */
+ struct list_head list_node; /* member of the pool */
+ union mlx5e_crypto_info crypto_info;
+ struct tls_offload_context_tx *tx_ctx;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ u32 key_id;
+ u8 create_err : 1;
};
static void
@@ -68,8 +108,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
struct mlx5e_ktls_offload_context_tx **ctx =
__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) >
- TLS_OFFLOAD_CONTEXT_SIZE_TX);
+ BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX);
*ctx = priv_tx;
}
@@ -83,65 +122,410 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
return *ctx;
}
+/* struct for callback API management */
+struct mlx5e_async_ctx {
+ struct mlx5_async_work context;
+ struct mlx5_async_ctx async_ctx;
+ struct work_struct work;
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct completion complete;
+ int err;
+ union {
+ u32 out_create[MLX5_ST_SZ_DW(create_tis_out)];
+ u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)];
+ };
+};
+
+static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n)
+{
+ struct mlx5e_async_ctx *bulk_async;
+ int i;
+
+ bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL);
+ if (!bulk_async)
+ return NULL;
+
+ for (i = 0; i < n; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ mlx5_cmd_init_async_ctx(mdev, &async->async_ctx);
+ init_completion(&async->complete);
+ }
+
+ return bulk_async;
+}
+
+static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ mlx5_cmd_cleanup_async_ctx(&async->async_ctx);
+ }
+ kvfree(bulk_async);
+}
+
+static void create_tis_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5e_async_ctx *async =
+ container_of(context, struct mlx5e_async_ctx, context);
+ struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx;
+
+ if (status) {
+ async->err = status;
+ priv_tx->create_err = 1;
+ goto out;
+ }
+
+ priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn);
+out:
+ complete(&async->complete);
+}
+
+static void destroy_tis_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5e_async_ctx *async =
+ container_of(context, struct mlx5e_async_ctx, context);
+ struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx;
+
+ complete(&async->complete);
+ kfree(priv_tx);
+}
+
+static struct mlx5e_ktls_offload_context_tx *
+mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats,
+ struct mlx5e_async_ctx *async)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ int err;
+
+ priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL);
+ if (!priv_tx)
+ return ERR_PTR(-ENOMEM);
+
+ priv_tx->mdev = mdev;
+ priv_tx->sw_stats = sw_stats;
+
+ if (!async) {
+ err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn);
+ if (err)
+ goto err_out;
+ } else {
+ async->priv_tx = priv_tx;
+ err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx,
+ async->out_create, sizeof(async->out_create),
+ create_tis_callback, &async->context);
+ if (err)
+ goto err_out;
+ }
+
+ return priv_tx;
+
+err_out:
+ kfree(priv_tx);
+ return ERR_PTR(err);
+}
+
+static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ struct mlx5e_async_ctx *async)
+{
+ if (priv_tx->create_err) {
+ complete(&async->complete);
+ kfree(priv_tx);
+ return;
+ }
+ async->priv_tx = priv_tx;
+ mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn,
+ &async->async_ctx,
+ async->out_destroy, sizeof(async->out_destroy),
+ destroy_tis_callback, &async->context);
+}
+
+static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
+ struct list_head *list, int size)
+{
+ struct mlx5e_ktls_offload_context_tx *obj, *n;
+ struct mlx5e_async_ctx *bulk_async;
+ int i;
+
+ bulk_async = mlx5e_bulk_async_init(mdev, size);
+ if (!bulk_async)
+ return;
+
+ i = 0;
+ list_for_each_entry_safe(obj, n, list, list_node) {
+ mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]);
+ i++;
+ }
+
+ for (i = 0; i < size; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ wait_for_completion(&async->complete);
+ }
+ mlx5e_bulk_async_cleanup(bulk_async, size);
+}
+
+/* Recycling pool API */
+
+#define MLX5E_TLS_TX_POOL_BULK (16)
+#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024)
+#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4)
+
+struct mlx5e_tls_tx_pool {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ struct mutex lock; /* Protects access to the pool */
+ struct list_head list;
+ size_t size;
+
+ struct workqueue_struct *wq;
+ struct work_struct create_work;
+ struct work_struct destroy_work;
+};
+
+static void create_work(struct work_struct *work)
+{
+ struct mlx5e_tls_tx_pool *pool =
+ container_of(work, struct mlx5e_tls_tx_pool, create_work);
+ struct mlx5e_ktls_offload_context_tx *obj;
+ struct mlx5e_async_ctx *bulk_async;
+ LIST_HEAD(local_list);
+ int i, j, err = 0;
+
+ bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK);
+ if (!bulk_async)
+ return;
+
+ for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) {
+ obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ break;
+ }
+ list_add(&obj->list_node, &local_list);
+ }
+
+ for (j = 0; j < i; j++) {
+ struct mlx5e_async_ctx *async = &bulk_async[j];
+
+ wait_for_completion(&async->complete);
+ if (!err && async->err)
+ err = async->err;
+ }
+ atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc);
+ mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK);
+ if (err)
+ goto err_out;
+
+ mutex_lock(&pool->lock);
+ if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) {
+ mutex_unlock(&pool->lock);
+ goto err_out;
+ }
+ list_splice(&local_list, &pool->list);
+ pool->size += MLX5E_TLS_TX_POOL_BULK;
+ if (pool->size <= MLX5E_TLS_TX_POOL_LOW)
+ queue_work(pool->wq, work);
+ mutex_unlock(&pool->lock);
+ return;
+
+err_out:
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i);
+ atomic64_add(i, &pool->sw_stats->tx_tls_pool_free);
+}
+
+static void destroy_work(struct work_struct *work)
+{
+ struct mlx5e_tls_tx_pool *pool =
+ container_of(work, struct mlx5e_tls_tx_pool, destroy_work);
+ struct mlx5e_ktls_offload_context_tx *obj;
+ LIST_HEAD(local_list);
+ int i = 0;
+
+ mutex_lock(&pool->lock);
+ if (pool->size < MLX5E_TLS_TX_POOL_HIGH) {
+ mutex_unlock(&pool->lock);
+ return;
+ }
+
+ list_for_each_entry(obj, &pool->list, list_node)
+ if (++i == MLX5E_TLS_TX_POOL_BULK)
+ break;
+
+ list_cut_position(&local_list, &pool->list, &obj->list_node);
+ pool->size -= MLX5E_TLS_TX_POOL_BULK;
+ if (pool->size >= MLX5E_TLS_TX_POOL_HIGH)
+ queue_work(pool->wq, work);
+ mutex_unlock(&pool->lock);
+
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK);
+ atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free);
+}
+
+static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev,
+ struct mlx5e_tls_sw_stats *sw_stats)
+{
+ struct mlx5e_tls_tx_pool *pool;
+
+ BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH);
+
+ pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool");
+ if (!pool->wq)
+ goto err_free;
+
+ INIT_LIST_HEAD(&pool->list);
+ mutex_init(&pool->lock);
+
+ INIT_WORK(&pool->create_work, create_work);
+ INIT_WORK(&pool->destroy_work, destroy_work);
+
+ pool->mdev = mdev;
+ pool->sw_stats = sw_stats;
+
+ return pool;
+
+err_free:
+ kvfree(pool);
+ return NULL;
+}
+
+static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool)
+{
+ while (pool->size > MLX5E_TLS_TX_POOL_BULK) {
+ struct mlx5e_ktls_offload_context_tx *obj;
+ LIST_HEAD(local_list);
+ int i = 0;
+
+ list_for_each_entry(obj, &pool->list, list_node)
+ if (++i == MLX5E_TLS_TX_POOL_BULK)
+ break;
+
+ list_cut_position(&local_list, &pool->list, &obj->list_node);
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK);
+ atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free);
+ pool->size -= MLX5E_TLS_TX_POOL_BULK;
+ }
+ if (pool->size) {
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size);
+ atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free);
+ }
+}
+
+static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool)
+{
+ mlx5e_tls_tx_pool_list_cleanup(pool);
+ destroy_workqueue(pool->wq);
+ kvfree(pool);
+}
+
+static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj)
+{
+ mutex_lock(&pool->lock);
+ list_add(&obj->list_node, &pool->list);
+ if (++pool->size == MLX5E_TLS_TX_POOL_HIGH)
+ queue_work(pool->wq, &pool->destroy_work);
+ mutex_unlock(&pool->lock);
+}
+
+static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool)
+{
+ struct mlx5e_ktls_offload_context_tx *obj;
+
+ mutex_lock(&pool->lock);
+ if (unlikely(pool->size == 0)) {
+ /* pool is empty:
+ * - trigger the populating work, and
+ * - serve the current context via the regular blocking api.
+ */
+ queue_work(pool->wq, &pool->create_work);
+ mutex_unlock(&pool->lock);
+ obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL);
+ if (!IS_ERR(obj))
+ atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc);
+ return obj;
+ }
+
+ obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx,
+ list_node);
+ list_del(&obj->list_node);
+ if (--pool->size == MLX5E_TLS_TX_POOL_LOW)
+ queue_work(pool->wq, &pool->create_work);
+ mutex_unlock(&pool->lock);
+ return obj;
+}
+
+/* End of pool API */
+
int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn)
{
struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_tls_tx_pool *pool;
struct tls_context *tls_ctx;
- struct mlx5_core_dev *mdev;
struct mlx5e_priv *priv;
int err;
tls_ctx = tls_get_ctx(sk);
priv = netdev_priv(netdev);
- mdev = priv->mdev;
+ pool = priv->tls->tx_pool;
- priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL);
- if (!priv_tx)
- return -ENOMEM;
+ priv_tx = pool_pop(pool);
+ if (IS_ERR(priv_tx))
+ return PTR_ERR(priv_tx);
- err = mlx5_ktls_create_key(mdev, crypto_info, &priv_tx->key_id);
+ err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id);
if (err)
goto err_create_key;
- priv_tx->sw_stats = &priv->tls->sw_stats;
priv_tx->expected_seq = start_offload_tcp_sn;
- priv_tx->crypto_info =
- *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ priv_tx->crypto_info.crypto_info_128 =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ priv_tx->crypto_info.crypto_info_256 =
+ *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->cipher_type);
+ return -EOPNOTSUPP;
+ }
priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx);
mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx);
- err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn);
- if (err)
- goto err_create_tis;
-
priv_tx->ctx_post_pending = true;
atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx);
return 0;
-err_create_tis:
- mlx5_ktls_destroy_key(mdev, priv_tx->key_id);
err_create_key:
- kfree(priv_tx);
+ pool_push(pool, priv_tx);
return err;
}
void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx)
{
struct mlx5e_ktls_offload_context_tx *priv_tx;
- struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_tx_pool *pool;
struct mlx5e_priv *priv;
priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
priv = netdev_priv(netdev);
- mdev = priv->mdev;
+ pool = priv->tls->tx_pool;
atomic64_inc(&priv_tx->sw_stats->tx_tls_del);
- mlx5e_destroy_tis(mdev, priv_tx->tisn);
- mlx5_ktls_destroy_key(mdev, priv_tx->key_id);
- kfree(priv_tx);
+ mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id);
+ pool_push(pool, priv_tx);
}
static void tx_fill_wi(struct mlx5e_txqsq *sq,
@@ -202,6 +586,16 @@ post_progress_params(struct mlx5e_txqsq *sq,
sq->pc += num_wqebbs;
}
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ tx_fill_wi(sq, pi, 1, 0, NULL);
+
+ mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
static void
mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
struct mlx5e_ktls_offload_context_tx *priv_tx,
@@ -213,6 +607,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
post_static_params(sq, priv_tx, fence_first_post);
post_progress_params(sq, priv_tx, progress_fence);
+ tx_post_fence_nop(sq);
}
struct tx_sync_info {
@@ -288,14 +683,31 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
struct mlx5e_ktls_offload_context_tx *priv_tx,
u64 rcd_sn)
{
- struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
__be64 rn_be = cpu_to_be64(rcd_sn);
bool skip_static_post;
u16 rec_seq_sz;
char *rec_seq;
- rec_seq = info->rec_seq;
- rec_seq_sz = sizeof(info->rec_seq);
+ switch (priv_tx->crypto_info.crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info.crypto_info_128;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info = &priv_tx->crypto_info.crypto_info_256;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ priv_tx->crypto_info.crypto_info.cipher_type);
+ return;
+ }
skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
if (!skip_static_post)
@@ -305,7 +717,7 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
}
static int
-tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first)
+tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn)
{
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_data_seg *dseg;
@@ -327,7 +739,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
cseg->tis_tir_num = cpu_to_be32(tisn << 8);
- cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
fsz = skb_frag_size(frag);
dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
@@ -362,67 +773,39 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
stats->tls_dump_bytes += wi->num_bytes;
}
-static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-
- tx_fill_wi(sq, pi, 1, 0, NULL);
-
- mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
-}
-
static enum mlx5e_ktls_sync_retval
mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
struct mlx5e_txqsq *sq,
int datalen,
u32 seq)
{
- struct mlx5e_sq_stats *stats = sq->stats;
enum mlx5e_ktls_sync_retval ret;
struct tx_sync_info info = {};
- int i = 0;
+ int i;
ret = tx_sync_info_get(priv_tx, seq, datalen, &info);
- if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) {
- if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) {
- stats->tls_skip_no_sync_data++;
- return MLX5E_KTLS_SYNC_SKIP_NO_DATA;
- }
- /* We might get here if a retransmission reaches the driver
- * after the relevant record is acked.
+ if (unlikely(ret != MLX5E_KTLS_SYNC_DONE))
+ /* We might get here with ret == FAIL if a retransmission
+ * reaches the driver after the relevant record is acked.
* It should be safe to drop the packet in this case
*/
- stats->tls_drop_no_sync_data++;
- goto err_out;
- }
-
- stats->tls_ooo++;
+ return ret;
tx_post_resync_params(sq, priv_tx, info.rcd_sn);
- /* If no dump WQE was sent, we need to have a fence NOP WQE before the
- * actual data xmit.
- */
- if (!info.nr_frags) {
- tx_post_fence_nop(sq);
- return MLX5E_KTLS_SYNC_DONE;
- }
-
- for (; i < info.nr_frags; i++) {
+ for (i = 0; i < info.nr_frags; i++) {
unsigned int orig_fsz, frag_offset = 0, n = 0;
skb_frag_t *f = &info.frags[i];
orig_fsz = skb_frag_size(f);
do {
- bool fence = !(i || frag_offset);
unsigned int fsz;
n++;
fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
skb_frag_size_set(f, fsz);
- if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+ if (tx_post_resync_dump(sq, f, priv_tx->tisn)) {
page_ref_add(skb_frag_page(f), n - 1);
goto err_out;
}
@@ -448,34 +831,55 @@ err_out:
return MLX5E_KTLS_SYNC_FAIL;
}
-bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, int datalen,
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
struct mlx5e_accel_tx_tls_state *state)
{
struct mlx5e_ktls_offload_context_tx *priv_tx;
struct mlx5e_sq_stats *stats = sq->stats;
+ struct net_device *tls_netdev;
+ struct tls_context *tls_ctx;
+ int datalen;
u32 seq;
+ datalen = skb->len - skb_tcp_all_headers(skb);
+ if (!datalen)
+ return true;
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ tls_netdev = rcu_dereference_bh(tls_ctx->netdev);
+ /* Don't WARN on NULL: if tls_device_down is running in parallel,
+ * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was
+ * true. Rather continue processing this packet.
+ */
+ if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev))
+ goto err_out;
+
priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
- if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+ if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx)))
mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
- }
seq = ntohl(tcp_hdr(skb)->seq);
if (unlikely(priv_tx->expected_seq != seq)) {
enum mlx5e_ktls_sync_retval ret =
mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
+ stats->tls_ooo++;
+
switch (ret) {
case MLX5E_KTLS_SYNC_DONE:
break;
case MLX5E_KTLS_SYNC_SKIP_NO_DATA:
+ stats->tls_skip_no_sync_data++;
if (likely(!skb->decrypted))
goto out;
WARN_ON_ONCE(1);
- fallthrough;
+ goto err_out;
case MLX5E_KTLS_SYNC_FAIL:
+ stats->tls_drop_no_sync_data++;
goto err_out;
}
}
@@ -494,3 +898,24 @@ err_out:
dev_kfree_skb_any(skb);
return false;
}
+
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_tx(priv->mdev))
+ return 0;
+
+ priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
+ if (!priv->tls->tx_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_tx(priv->mdev))
+ return;
+
+ mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
+ priv->tls->tx_pool = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
index ac29aeb8af49..570a912dd6fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
@@ -21,7 +21,7 @@ enum {
static void
fill_static_params(struct mlx5_wqe_tls_static_params_seg *params,
- struct tls12_crypto_info_aes_gcm_128 *info,
+ union mlx5e_crypto_info *crypto_info,
u32 key_id, u32 resync_tcp_sn)
{
char *initial_rn, *gcm_iv;
@@ -32,7 +32,26 @@ fill_static_params(struct mlx5_wqe_tls_static_params_seg *params,
ctx = params->ctx;
- EXTRACT_INFO_FIELDS;
+ switch (crypto_info->crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ &crypto_info->crypto_info_128;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ &crypto_info->crypto_info_256;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->crypto_info.cipher_type);
+ return;
+ }
gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
@@ -54,7 +73,7 @@ fill_static_params(struct mlx5_wqe_tls_static_params_seg *params,
void
mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
u16 pc, u32 sqn,
- struct tls12_crypto_info_aes_gcm_128 *info,
+ union mlx5e_crypto_info *crypto_info,
u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn,
bool fence, enum tls_offload_ctx_dir direction)
{
@@ -75,7 +94,7 @@ mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
ucseg->flags = MLX5_UMR_INLINE;
ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
- fill_static_params(&wqe->params, info, key_id, resync_tcp_sn);
+ fill_static_params(&wqe->params, crypto_info, key_id, resync_tcp_sn);
}
static void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index 08c9d5134479..2dd78dd4ad65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -16,8 +16,8 @@ struct mlx5e_accel_tx_tls_state {
u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, int datalen,
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
struct mlx5e_accel_tx_tls_state *state);
void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
struct mlx5_cqe64 *cqe, u32 *cqe_bcnt);
@@ -48,6 +48,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
{
return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state);
}
+
+static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb)
+{
+ return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
+static inline void
+mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
+ struct mlx5e_accel_tx_tls_state *state)
+{
+ cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
+}
#else
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
@@ -69,6 +81,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
return false;
}
+static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return 0;
+}
+
+static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe,
+ u32 *cqe_bcnt)
+{
+}
#endif /* CONFIG_MLX5_EN_TLS */
#endif /* __MLX5E_TLS_TXRX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
index e5c180f2403b..3d79cd379890 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
@@ -6,7 +6,6 @@
#include <net/tls.h>
#include "en.h"
-#include "accel/tls.h"
enum {
MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
@@ -28,6 +27,12 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx);
void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, u32 seq, u8 *rcd_sn);
+union mlx5e_crypto_info {
+ struct tls_crypto_info crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 crypto_info_128;
+ struct tls12_crypto_info_aes_gcm_256 crypto_info_256;
+};
+
struct mlx5e_set_tls_static_params_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl;
@@ -73,7 +78,7 @@ struct mlx5e_get_tls_progress_params_wqe {
void
mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
u16 pc, u32 sqn,
- struct tls12_crypto_info_aes_gcm_128 *info,
+ union mlx5e_crypto_info *crypto_info,
u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn,
bool fence, enum tls_offload_ctx_dir direction);
void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
new file mode 100644
index 000000000000..2ef36cb9555a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -0,0 +1,1861 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/xarray.h>
+
+#include "en.h"
+#include "lib/aso.h"
+#include "lib/mlx5.h"
+#include "en_accel/macsec.h"
+#include "en_accel/macsec_fs.h"
+
+#define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L
+#define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso)
+
+enum mlx5_macsec_aso_event_arm {
+ MLX5E_ASO_EPN_ARM = BIT(0),
+};
+
+enum {
+ MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET,
+};
+
+struct mlx5e_macsec_handle {
+ struct mlx5e_macsec *macsec;
+ u32 obj_id;
+ u8 idx;
+};
+
+enum {
+ MLX5_MACSEC_EPN,
+};
+
+struct mlx5e_macsec_aso_out {
+ u8 event_arm;
+ u32 mode_param;
+};
+
+struct mlx5e_macsec_aso_in {
+ u8 mode;
+ u32 obj_id;
+};
+
+struct mlx5e_macsec_epn_state {
+ u32 epn_msb;
+ u8 epn_enabled;
+ u8 overlap;
+};
+
+struct mlx5e_macsec_async_work {
+ struct mlx5e_macsec *macsec;
+ struct mlx5_core_dev *mdev;
+ struct work_struct work;
+ u32 obj_id;
+};
+
+struct mlx5e_macsec_sa {
+ bool active;
+ u8 assoc_num;
+ u32 macsec_obj_id;
+ u32 enc_key_id;
+ u32 next_pn;
+ sci_t sci;
+ salt_t salt;
+
+ struct rhash_head hash;
+ u32 fs_id;
+ union mlx5e_macsec_rule *macsec_rule;
+ struct rcu_head rcu_head;
+ struct mlx5e_macsec_epn_state epn_state;
+};
+
+struct mlx5e_macsec_rx_sc;
+struct mlx5e_macsec_rx_sc_xarray_element {
+ u32 fs_id;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+};
+
+struct mlx5e_macsec_rx_sc {
+ bool active;
+ sci_t sci;
+ struct mlx5e_macsec_sa *rx_sa[MACSEC_NUM_AN];
+ struct list_head rx_sc_list_element;
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ struct metadata_dst *md_dst;
+ struct rcu_head rcu_head;
+};
+
+struct mlx5e_macsec_umr {
+ dma_addr_t dma_addr;
+ u8 ctx[MLX5_ST_SZ_BYTES(macsec_aso)];
+ u32 mkey;
+};
+
+struct mlx5e_macsec_aso {
+ /* ASO */
+ struct mlx5_aso *maso;
+ /* Protects macsec ASO */
+ struct mutex aso_lock;
+ /* UMR */
+ struct mlx5e_macsec_umr *umr;
+
+ u32 pdn;
+};
+
+static const struct rhashtable_params rhash_sci = {
+ .key_len = sizeof_field(struct mlx5e_macsec_sa, sci),
+ .key_offset = offsetof(struct mlx5e_macsec_sa, sci),
+ .head_offset = offsetof(struct mlx5e_macsec_sa, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+struct mlx5e_macsec_device {
+ const struct net_device *netdev;
+ struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN];
+ struct list_head macsec_rx_sc_list_head;
+ unsigned char *dev_addr;
+ struct list_head macsec_device_list_element;
+};
+
+struct mlx5e_macsec {
+ struct list_head macsec_device_list_head;
+ int num_of_devices;
+ struct mlx5e_macsec_fs *macsec_fs;
+ struct mutex lock; /* Protects mlx5e_macsec internal contexts */
+
+ /* Tx sci -> fs id mapping handling */
+ struct rhashtable sci_hash; /* sci -> mlx5e_macsec_sa */
+
+ /* Rx fs_id -> rx_sc mapping */
+ struct xarray sc_xarray;
+
+ struct mlx5_core_dev *mdev;
+
+ /* Stats manage */
+ struct mlx5e_macsec_stats stats;
+
+ /* ASO */
+ struct mlx5e_macsec_aso aso;
+
+ struct notifier_block nb;
+ struct workqueue_struct *wq;
+};
+
+struct mlx5_macsec_obj_attrs {
+ u32 aso_pdn;
+ u32 next_pn;
+ __be64 sci;
+ u32 enc_key_id;
+ bool encrypt;
+ struct mlx5e_macsec_epn_state epn_state;
+ salt_t salt;
+ __be32 ssci;
+ bool replay_protect;
+ u32 replay_window;
+};
+
+struct mlx5_aso_ctrl_param {
+ u8 data_mask_mode;
+ u8 condition_0_operand;
+ u8 condition_1_operand;
+ u8 condition_0_offset;
+ u8 condition_1_offset;
+ u8 data_offset;
+ u8 condition_operand;
+ u32 condition_0_data;
+ u32 condition_0_mask;
+ u32 condition_1_data;
+ u32 condition_1_mask;
+ u64 bitwise_data;
+ u64 data_mask;
+};
+
+static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso)
+{
+ struct mlx5e_macsec_umr *umr;
+ struct device *dma_device;
+ dma_addr_t dma_addr;
+ int err;
+
+ umr = kzalloc(sizeof(*umr), GFP_KERNEL);
+ if (!umr) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ dma_device = &mdev->pdev->dev;
+ dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+ err = dma_mapping_error(dma_device, dma_addr);
+ if (err) {
+ mlx5_core_err(mdev, "Can't map dma device, err=%d\n", err);
+ goto out_dma;
+ }
+
+ err = mlx5e_create_mkey(mdev, aso->pdn, &umr->mkey);
+ if (err) {
+ mlx5_core_err(mdev, "Can't create mkey, err=%d\n", err);
+ goto out_mkey;
+ }
+
+ umr->dma_addr = dma_addr;
+
+ aso->umr = umr;
+
+ return 0;
+
+out_mkey:
+ dma_unmap_single(dma_device, dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+out_dma:
+ kfree(umr);
+ return err;
+}
+
+static void mlx5e_macsec_aso_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso)
+{
+ struct mlx5e_macsec_umr *umr = aso->umr;
+
+ mlx5_core_destroy_mkey(mdev, umr->mkey);
+ dma_unmap_single(&mdev->pdev->dev, umr->dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+ kfree(umr);
+}
+
+static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, void *aso_ctx)
+{
+ u8 window_sz;
+
+ if (!attrs->replay_protect)
+ return 0;
+
+ switch (attrs->replay_window) {
+ case 256:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT;
+ break;
+ case 128:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT;
+ break;
+ case 64:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT;
+ break;
+ case 32:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz);
+ MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION);
+
+ return 0;
+}
+
+static int mlx5e_macsec_create_object(struct mlx5_core_dev *mdev,
+ struct mlx5_macsec_obj_attrs *attrs,
+ bool is_tx,
+ u32 *macsec_obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_macsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ void *aso_ctx;
+ void *obj;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_macsec_obj_in, in, macsec_object);
+ aso_ctx = MLX5_ADDR_OF(macsec_offload_obj, obj, macsec_aso);
+
+ MLX5_SET(macsec_offload_obj, obj, confidentiality_en, attrs->encrypt);
+ MLX5_SET(macsec_offload_obj, obj, dekn, attrs->enc_key_id);
+ MLX5_SET(macsec_offload_obj, obj, aso_return_reg, MLX5_MACSEC_ASO_REG_C_4_5);
+ MLX5_SET(macsec_offload_obj, obj, macsec_aso_access_pd, attrs->aso_pdn);
+ MLX5_SET(macsec_aso, aso_ctx, mode_parameter, attrs->next_pn);
+
+ /* Epn */
+ if (attrs->epn_state.epn_enabled) {
+ void *salt_p;
+ int i;
+
+ MLX5_SET(macsec_aso, aso_ctx, epn_event_arm, 1);
+ MLX5_SET(macsec_offload_obj, obj, epn_en, 1);
+ MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb);
+ MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap);
+ MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)attrs->ssci);
+ salt_p = MLX5_ADDR_OF(macsec_offload_obj, obj, salt);
+ for (i = 0; i < 3 ; i++)
+ memcpy((u32 *)salt_p + i, &attrs->salt.bytes[4 * (2 - i)], 4);
+ } else {
+ MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)(attrs->sci));
+ }
+
+ MLX5_SET(macsec_aso, aso_ctx, valid, 0x1);
+ if (is_tx) {
+ MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_INC_SN);
+ } else {
+ err = macsec_set_replay_protection(attrs, aso_ctx);
+ if (err)
+ return err;
+ }
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev,
+ "MACsec offload: Failed to create MACsec object (err = %d)\n",
+ err);
+ return err;
+ }
+
+ *macsec_obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *sa,
+ bool is_tx)
+{
+ int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ if ((is_tx) && sa->fs_id) {
+ /* Make sure ongoing datapath readers sees a valid SA */
+ rhashtable_remove_fast(&macsec->sci_hash, &sa->hash, rhash_sci);
+ sa->fs_id = 0;
+ }
+
+ if (!sa->macsec_rule)
+ return;
+
+ mlx5e_macsec_fs_del_rule(macsec->macsec_fs, sa->macsec_rule, action);
+ mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id);
+ sa->macsec_rule = NULL;
+}
+
+static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
+ struct mlx5e_macsec_sa *sa,
+ bool encrypt,
+ bool is_tx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5_macsec_rule_attrs rule_attrs;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_macsec_obj_attrs obj_attrs;
+ union mlx5e_macsec_rule *macsec_rule;
+ struct macsec_key *key;
+ int err;
+
+ obj_attrs.next_pn = sa->next_pn;
+ obj_attrs.sci = cpu_to_be64((__force u64)sa->sci);
+ obj_attrs.enc_key_id = sa->enc_key_id;
+ obj_attrs.encrypt = encrypt;
+ obj_attrs.aso_pdn = macsec->aso.pdn;
+ obj_attrs.epn_state = sa->epn_state;
+
+ if (is_tx) {
+ obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci);
+ key = &ctx->sa.tx_sa->key;
+ } else {
+ obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
+ key = &ctx->sa.rx_sa->key;
+ }
+
+ memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
+ obj_attrs.replay_window = ctx->secy->replay_window;
+ obj_attrs.replay_protect = ctx->secy->replay_protect;
+
+ err = mlx5e_macsec_create_object(mdev, &obj_attrs, is_tx, &sa->macsec_obj_id);
+ if (err)
+ return err;
+
+ rule_attrs.macsec_obj_id = sa->macsec_obj_id;
+ rule_attrs.sci = sa->sci;
+ rule_attrs.assoc_num = sa->assoc_num;
+ rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ macsec_rule = mlx5e_macsec_fs_add_rule(macsec->macsec_fs, ctx, &rule_attrs, &sa->fs_id);
+ if (!macsec_rule) {
+ err = -ENOMEM;
+ goto destroy_macsec_object;
+ }
+
+ sa->macsec_rule = macsec_rule;
+
+ if (is_tx) {
+ err = rhashtable_insert_fast(&macsec->sci_hash, &sa->hash, rhash_sci);
+ if (err)
+ goto destroy_macsec_object_and_rule;
+ }
+
+ return 0;
+
+destroy_macsec_object_and_rule:
+ mlx5e_macsec_cleanup_sa(macsec, sa, is_tx);
+destroy_macsec_object:
+ mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id);
+
+ return err;
+}
+
+static struct mlx5e_macsec_rx_sc *
+mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci)
+{
+ struct mlx5e_macsec_rx_sc *iter;
+
+ list_for_each_entry_rcu(iter, list, rx_sc_list_element) {
+ if (iter->sci == sci)
+ return iter;
+ }
+
+ return NULL;
+}
+
+static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *rx_sa,
+ bool active)
+{
+ struct mlx5_core_dev *mdev = macsec->mdev;
+ struct mlx5_macsec_obj_attrs attrs = {};
+ int err = 0;
+
+ if (rx_sa->active != active)
+ return 0;
+
+ rx_sa->active = active;
+ if (!active) {
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ return 0;
+ }
+
+ attrs.sci = cpu_to_be64((__force u64)rx_sa->sci);
+ attrs.enc_key_id = rx_sa->enc_key_id;
+ err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx)
+{
+ const struct net_device *netdev = ctx->netdev;
+ const struct macsec_secy *secy = ctx->secy;
+
+ if (secy->validate_frames != MACSEC_VALIDATE_STRICT) {
+ netdev_err(netdev,
+ "MACsec offload is supported only when validate_frame is in strict mode\n");
+ return false;
+ }
+
+ if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) {
+ netdev_err(netdev, "MACsec offload is supported only when icv_len is %d\n",
+ MACSEC_DEFAULT_ICV_LEN);
+ return false;
+ }
+
+ if (!secy->protect_frames) {
+ netdev_err(netdev,
+ "MACsec offload is supported only when protect_frames is set\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5e_macsec_device *
+mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec,
+ const struct macsec_context *ctx)
+{
+ struct mlx5e_macsec_device *iter;
+ const struct list_head *list;
+
+ list = &macsec->macsec_device_list_head;
+ list_for_each_entry_rcu(iter, list, macsec_device_list_element) {
+ if (iter->netdev == ctx->secy->netdev)
+ return iter;
+ }
+
+ return NULL;
+}
+
+static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key,
+ const pn_t *next_pn_halves)
+{
+ struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state;
+
+ sa->salt = key->salt;
+ epn_state->epn_enabled = 1;
+ epn_state->epn_msb = next_pn_halves->upper;
+ epn_state->overlap = next_pn_halves->lower < MLX5_MACSEC_EPN_SCOPE_MID ? 0 : 1;
+}
+
+static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_secy *secy = ctx->secy;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (macsec_device->tx_sa[assoc_num]) {
+ netdev_err(ctx->netdev, "MACsec offload tx_sa: %d already exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ tx_sa = kzalloc(sizeof(*tx_sa), GFP_KERNEL);
+ if (!tx_sa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tx_sa->active = ctx_tx_sa->active;
+ tx_sa->next_pn = ctx_tx_sa->next_pn_halves.lower;
+ tx_sa->sci = secy->sci;
+ tx_sa->assoc_num = assoc_num;
+
+ if (secy->xpn)
+ update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves);
+
+ err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len,
+ MLX5_ACCEL_OBJ_MACSEC_KEY,
+ &tx_sa->enc_key_id);
+ if (err)
+ goto destroy_sa;
+
+ macsec_device->tx_sa[assoc_num] = tx_sa;
+ if (!secy->operational ||
+ assoc_num != tx_sc->encoding_sa ||
+ !tx_sa->active)
+ goto out;
+
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto destroy_encryption_key;
+
+ mutex_unlock(&macsec->lock);
+
+ return 0;
+
+destroy_encryption_key:
+ macsec_device->tx_sa[assoc_num] = NULL;
+ mlx5_destroy_encryption_key(mdev, tx_sa->enc_key_id);
+destroy_sa:
+ kfree(tx_sa);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ struct net_device *netdev;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ netdev = ctx->netdev;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ tx_sa = macsec_device->tx_sa[assoc_num];
+ if (!tx_sa) {
+ netdev_err(netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (tx_sa->next_pn != ctx_tx_sa->next_pn_halves.lower) {
+ netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n",
+ assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (tx_sa->active == ctx_tx_sa->active)
+ goto out;
+
+ if (tx_sa->assoc_num != tx_sc->encoding_sa)
+ goto out;
+
+ if (ctx_tx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto out;
+ } else {
+ if (!tx_sa->macsec_rule) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ }
+
+ tx_sa->active = ctx_tx_sa->active;
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_txsa(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ tx_sa = macsec_device->tx_sa[assoc_num];
+ if (!tx_sa) {
+ netdev_err(ctx->netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
+ kfree_rcu(tx_sa);
+ macsec_device->tx_sa[assoc_num] = NULL;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t *sci)
+{
+ struct mlx5e_macsec_sa *macsec_sa;
+ u32 fs_id = 0;
+
+ rcu_read_lock();
+ macsec_sa = rhashtable_lookup(sci_hash, sci, rhash_sci);
+ if (macsec_sa)
+ fs_id = macsec_sa->fs_id;
+ rcu_read_unlock();
+
+ return fs_id;
+}
+
+static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct list_head *rx_sc_list;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sc_list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(rx_sc_list, ctx_rx_sc->sci);
+ if (rx_sc) {
+ netdev_err(ctx->netdev, "MACsec offload: rx_sc (sci %lld) already exists\n",
+ ctx_rx_sc->sci);
+ err = -EEXIST;
+ goto out;
+ }
+
+ rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL);
+ if (!rx_sc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sc_xarray_element = kzalloc(sizeof(*sc_xarray_element), GFP_KERNEL);
+ if (!sc_xarray_element) {
+ err = -ENOMEM;
+ goto destroy_rx_sc;
+ }
+
+ sc_xarray_element->rx_sc = rx_sc;
+ err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element,
+ XA_LIMIT(1, USHRT_MAX), GFP_KERNEL);
+ if (err)
+ goto destroy_sc_xarray_elemenet;
+
+ rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL);
+ if (!rx_sc->md_dst) {
+ err = -ENOMEM;
+ goto erase_xa_alloc;
+ }
+
+ rx_sc->sci = ctx_rx_sc->sci;
+ rx_sc->active = ctx_rx_sc->active;
+ list_add_rcu(&rx_sc->rx_sc_list_element, rx_sc_list);
+
+ rx_sc->sc_xarray_element = sc_xarray_element;
+ rx_sc->md_dst->u.macsec_info.sci = rx_sc->sci;
+ mutex_unlock(&macsec->lock);
+
+ return 0;
+
+erase_xa_alloc:
+ xa_erase(&macsec->sc_xarray, sc_xarray_element->fs_id);
+destroy_sc_xarray_elemenet:
+ kfree(sc_xarray_element);
+destroy_rx_sc:
+ kfree(rx_sc);
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int i;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx_rx_sc->sci);
+ if (!rx_sc) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sc->active = ctx_rx_sc->active;
+ if (rx_sc->active == ctx_rx_sc->active)
+ goto out;
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active);
+ if (err)
+ goto out;
+ }
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx->rx_sc->sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+
+ kfree(rx_sa);
+ rx_sc->rx_sa[i] = NULL;
+ }
+
+/*
+ * At this point the relevant MACsec offload Rx rule already removed at
+ * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current
+ * Rx related data propagating using xa_erase which uses rcu to sync,
+ * once fs_id is erased then this rx_sc is hidden from datapath.
+ */
+ list_del_rcu(&rx_sc->rx_sc_list_element);
+ xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id);
+ metadata_dst_free(rx_sc->md_dst);
+ kfree(rx_sc->sc_xarray_element);
+
+ kfree_rcu(rx_sc);
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ sci_t sci = ctx_rx_sa->sc->sci;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rx_sc->rx_sa[assoc_num]) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d already exist\n",
+ sci, assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ rx_sa = kzalloc(sizeof(*rx_sa), GFP_KERNEL);
+ if (!rx_sa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rx_sa->active = ctx_rx_sa->active;
+ rx_sa->next_pn = ctx_rx_sa->next_pn;
+ rx_sa->sci = sci;
+ rx_sa->assoc_num = assoc_num;
+ rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id;
+
+ if (ctx->secy->xpn)
+ update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves);
+
+ err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len,
+ MLX5_ACCEL_OBJ_MACSEC_KEY,
+ &rx_sa->enc_key_id);
+ if (err)
+ goto destroy_sa;
+
+ rx_sc->rx_sa[assoc_num] = rx_sa;
+ if (!rx_sa->active)
+ goto out;
+
+ //TODO - add support for both authentication and encryption flows
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
+ if (err)
+ goto destroy_encryption_key;
+
+ goto out;
+
+destroy_encryption_key:
+ rx_sc->rx_sa[assoc_num] = NULL;
+ mlx5_destroy_encryption_key(mdev, rx_sa->enc_key_id);
+destroy_sa:
+ kfree(rx_sa);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ sci_t sci = ctx_rx_sa->sc->sci;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sa = rx_sc->rx_sa[assoc_num];
+ if (!rx_sa) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n",
+ sci, assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rx_sa->next_pn != ctx_rx_sa->next_pn_halves.lower) {
+ netdev_err(ctx->netdev,
+ "MACsec offload update RX sa %d PN isn't supported\n",
+ assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ sci_t sci = ctx->sa.rx_sa->sc->sci;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sa = rx_sc->rx_sa[assoc_num];
+ if (!rx_sa) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n",
+ sci, assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+ kfree(rx_sa);
+ rx_sc->rx_sa[assoc_num] = NULL;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_add_secy(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ const struct net_device *netdev = ctx->netdev;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ if (!mlx5e_macsec_secy_features_validate(ctx))
+ return -EINVAL;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ if (mlx5e_macsec_get_macsec_device_context(macsec, ctx)) {
+ netdev_err(netdev, "MACsec offload: MACsec net_device already exist\n");
+ goto out;
+ }
+
+ if (macsec->num_of_devices >= MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES) {
+ netdev_err(netdev, "Currently, only %d MACsec offload devices can be set\n",
+ MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES);
+ err = -EBUSY;
+ goto out;
+ }
+
+ macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+ if (!macsec_device) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ macsec_device->dev_addr = kmemdup(dev->dev_addr, dev->addr_len, GFP_KERNEL);
+ if (!macsec_device->dev_addr) {
+ kfree(macsec_device);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ macsec_device->netdev = dev;
+
+ INIT_LIST_HEAD_RCU(&macsec_device->macsec_rx_sc_list_head);
+ list_add_rcu(&macsec_device->macsec_device_list_element, &macsec->macsec_device_list_head);
+
+ ++macsec->num_of_devices;
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
+ struct mlx5e_macsec_device *macsec_device)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct list_head *list;
+ int i, err = 0;
+
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa || !rx_sa->macsec_rule)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ }
+ }
+
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ if (rx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false);
+ if (err)
+ goto out;
+ }
+ }
+ }
+
+ memcpy(macsec_device->dev_addr, dev->dev_addr, dev->addr_len);
+out:
+ return err;
+}
+
+/* this function is called from 2 macsec ops functions:
+ * macsec_set_mac_address – MAC address was changed, therefore we need to destroy
+ * and create new Tx contexts(macsec object + steering).
+ * macsec_changelink – in this case the tx SC or SecY may be changed, therefore need to
+ * destroy Tx and Rx contexts(macsec object + steering)
+ */
+static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int i, err = 0;
+
+ if (!mlx5e_macsec_secy_features_validate(ctx))
+ return -EINVAL;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* if the dev_addr hasn't change, it mean the callback is from macsec_changelink */
+ if (!memcmp(macsec_device->dev_addr, dev->dev_addr, dev->addr_len)) {
+ err = macsec_upd_secy_hw_address(ctx, macsec_device);
+ if (err)
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
+ kfree(tx_sa);
+ macsec_device->tx_sa[i] = NULL;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+ kfree(rx_sa);
+ rx_sc->rx_sa[i] = NULL;
+ }
+
+ list_del_rcu(&rx_sc->rx_sc_list_element);
+
+ kfree_rcu(rx_sc);
+ }
+
+ kfree(macsec_device->dev_addr);
+ macsec_device->dev_addr = NULL;
+
+ list_del_rcu(&macsec_device->macsec_device_list_element);
+ --macsec->num_of_devices;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static void macsec_build_accel_attrs(struct mlx5e_macsec_sa *sa,
+ struct mlx5_macsec_obj_attrs *attrs)
+{
+ attrs->epn_state.epn_msb = sa->epn_state.epn_msb;
+ attrs->epn_state.overlap = sa->epn_state.overlap;
+}
+
+static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso,
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl,
+ struct mlx5_aso_ctrl_param *param)
+{
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ if (macsec_aso->umr->dma_addr) {
+ aso_ctrl->va_l = cpu_to_be32(macsec_aso->umr->dma_addr | ASO_CTRL_READ_EN);
+ aso_ctrl->va_h = cpu_to_be32((u64)macsec_aso->umr->dma_addr >> 32);
+ aso_ctrl->l_key = cpu_to_be32(macsec_aso->umr->mkey);
+ }
+
+ if (!param)
+ return;
+
+ aso_ctrl->data_mask_mode = param->data_mask_mode << 6;
+ aso_ctrl->condition_1_0_operand = param->condition_1_operand |
+ param->condition_0_operand << 4;
+ aso_ctrl->condition_1_0_offset = param->condition_1_offset |
+ param->condition_0_offset << 4;
+ aso_ctrl->data_offset_condition_operand = param->data_offset |
+ param->condition_operand << 6;
+ aso_ctrl->condition_0_data = cpu_to_be32(param->condition_0_data);
+ aso_ctrl->condition_0_mask = cpu_to_be32(param->condition_0_mask);
+ aso_ctrl->condition_1_data = cpu_to_be32(param->condition_1_data);
+ aso_ctrl->condition_1_mask = cpu_to_be32(param->condition_1_mask);
+ aso_ctrl->bitwise_data = cpu_to_be64(param->bitwise_data);
+ aso_ctrl->data_mask = cpu_to_be64(param->data_mask);
+}
+
+static int mlx5e_macsec_modify_obj(struct mlx5_core_dev *mdev, struct mlx5_macsec_obj_attrs *attrs,
+ u32 macsec_id)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_macsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(query_macsec_obj_out)];
+ u64 modify_field_select = 0;
+ void *obj;
+ int err;
+
+ /* General object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_id);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev, "Query MACsec object failed (Object id %d), err = %d\n",
+ macsec_id, err);
+ return err;
+ }
+
+ obj = MLX5_ADDR_OF(query_macsec_obj_out, out, macsec_object);
+ modify_field_select = MLX5_GET64(macsec_offload_obj, obj, modify_field_select);
+
+ /* EPN */
+ if (!(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP) ||
+ !(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB)) {
+ mlx5_core_dbg(mdev, "MACsec object field is not modifiable (Object id %d)\n",
+ macsec_id);
+ return -EOPNOTSUPP;
+ }
+
+ obj = MLX5_ADDR_OF(modify_macsec_obj_in, in, macsec_object);
+ MLX5_SET64(macsec_offload_obj, obj, modify_field_select,
+ MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP | MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB);
+ MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb);
+ MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap);
+
+ /* General object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void macsec_aso_build_ctrl(struct mlx5e_macsec_aso *aso,
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl,
+ struct mlx5e_macsec_aso_in *in)
+{
+ struct mlx5_aso_ctrl_param param = {};
+
+ param.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT;
+ param.condition_0_operand = MLX5_ASO_ALWAYS_TRUE;
+ param.condition_1_operand = MLX5_ASO_ALWAYS_TRUE;
+ if (in->mode == MLX5_MACSEC_EPN) {
+ param.data_offset = MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ param.bitwise_data = BIT_ULL(54);
+ param.data_mask = param.bitwise_data;
+ }
+ macsec_aso_build_wqe_ctrl_seg(aso, aso_ctrl, &param);
+}
+
+static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_aso_in *in)
+{
+ struct mlx5e_macsec_aso *aso;
+ struct mlx5_aso_wqe *aso_wqe;
+ struct mlx5_aso *maso;
+ int err;
+
+ aso = &macsec->aso;
+ maso = aso->maso;
+
+ mutex_lock(&aso->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(maso);
+ mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC);
+ macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in);
+ mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
+ err = mlx5_aso_poll_cq(maso, false);
+ mutex_unlock(&aso->aso_lock);
+
+ return err;
+}
+
+static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_aso_in *in, struct mlx5e_macsec_aso_out *out)
+{
+ struct mlx5e_macsec_aso *aso;
+ struct mlx5_aso_wqe *aso_wqe;
+ struct mlx5_aso *maso;
+ int err;
+
+ aso = &macsec->aso;
+ maso = aso->maso;
+
+ mutex_lock(&aso->aso_lock);
+
+ aso_wqe = mlx5_aso_get_wqe(maso);
+ mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC);
+ macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL);
+
+ mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
+ err = mlx5_aso_poll_cq(maso, false);
+ if (err)
+ goto err_out;
+
+ if (MLX5_GET(macsec_aso, aso->umr->ctx, epn_event_arm))
+ out->event_arm |= MLX5E_ASO_EPN_ARM;
+
+ out->mode_param = MLX5_GET(macsec_aso, aso->umr->ctx, mode_parameter);
+
+err_out:
+ mutex_unlock(&aso->aso_lock);
+ return err;
+}
+
+static struct mlx5e_macsec_sa *get_macsec_tx_sa_from_obj_id(const struct mlx5e_macsec *macsec,
+ const u32 obj_id)
+{
+ const struct list_head *device_list;
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec_device *iter;
+ int i;
+
+ device_list = &macsec->macsec_device_list_head;
+
+ list_for_each_entry(iter, device_list, macsec_device_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ macsec_sa = iter->tx_sa[i];
+ if (!macsec_sa || !macsec_sa->active)
+ continue;
+ if (macsec_sa->macsec_obj_id == obj_id)
+ return macsec_sa;
+ }
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_macsec_sa *get_macsec_rx_sa_from_obj_id(const struct mlx5e_macsec *macsec,
+ const u32 obj_id)
+{
+ const struct list_head *device_list, *sc_list;
+ struct mlx5e_macsec_rx_sc *mlx5e_rx_sc;
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec_device *iter;
+ int i;
+
+ device_list = &macsec->macsec_device_list_head;
+
+ list_for_each_entry(iter, device_list, macsec_device_list_element) {
+ sc_list = &iter->macsec_rx_sc_list_head;
+ list_for_each_entry(mlx5e_rx_sc, sc_list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ macsec_sa = mlx5e_rx_sc->rx_sa[i];
+ if (!macsec_sa || !macsec_sa->active)
+ continue;
+ if (macsec_sa->macsec_obj_id == obj_id)
+ return macsec_sa;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void macsec_epn_update(struct mlx5e_macsec *macsec, struct mlx5_core_dev *mdev,
+ struct mlx5e_macsec_sa *sa, u32 obj_id, u32 mode_param)
+{
+ struct mlx5_macsec_obj_attrs attrs = {};
+ struct mlx5e_macsec_aso_in in = {};
+
+ /* When the bottom of the replay protection window (mode_param) crosses 2^31 (half sequence
+ * number wraparound) hence mode_param > MLX5_MACSEC_EPN_SCOPE_MID the SW should update the
+ * esn_overlap to OLD (1).
+ * When the bottom of the replay protection window (mode_param) crosses 2^32 (full sequence
+ * number wraparound) hence mode_param < MLX5_MACSEC_EPN_SCOPE_MID since it did a
+ * wraparound, the SW should update the esn_overlap to NEW (0), and increment the esn_msb.
+ */
+
+ if (mode_param < MLX5_MACSEC_EPN_SCOPE_MID) {
+ sa->epn_state.epn_msb++;
+ sa->epn_state.overlap = 0;
+ } else {
+ sa->epn_state.overlap = 1;
+ }
+
+ macsec_build_accel_attrs(sa, &attrs);
+ mlx5e_macsec_modify_obj(mdev, &attrs, obj_id);
+
+ /* Re-set EPN arm event */
+ in.obj_id = obj_id;
+ in.mode = MLX5_MACSEC_EPN;
+ macsec_aso_set_arm_event(mdev, macsec, &in);
+}
+
+static void macsec_async_event(struct work_struct *work)
+{
+ struct mlx5e_macsec_async_work *async_work;
+ struct mlx5e_macsec_aso_out out = {};
+ struct mlx5e_macsec_aso_in in = {};
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec *macsec;
+ struct mlx5_core_dev *mdev;
+ u32 obj_id;
+
+ async_work = container_of(work, struct mlx5e_macsec_async_work, work);
+ macsec = async_work->macsec;
+ mdev = async_work->mdev;
+ obj_id = async_work->obj_id;
+ macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id);
+ if (!macsec_sa) {
+ macsec_sa = get_macsec_rx_sa_from_obj_id(macsec, obj_id);
+ if (!macsec_sa) {
+ mlx5_core_dbg(mdev, "MACsec SA is not found (SA object id %d)\n", obj_id);
+ goto out_async_work;
+ }
+ }
+
+ /* Query MACsec ASO context */
+ in.obj_id = obj_id;
+ macsec_aso_query(mdev, macsec, &in, &out);
+
+ /* EPN case */
+ if (macsec_sa->epn_state.epn_enabled && !(out.event_arm & MLX5E_ASO_EPN_ARM))
+ macsec_epn_update(macsec, mdev, macsec_sa, obj_id, out.mode_param);
+
+out_async_work:
+ kfree(async_work);
+}
+
+static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_macsec *macsec = container_of(nb, struct mlx5e_macsec, nb);
+ struct mlx5e_macsec_async_work *async_work;
+ struct mlx5_eqe_obj_change *obj_change;
+ struct mlx5_eqe *eqe = data;
+ u16 obj_type;
+ u32 obj_id;
+
+ if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE)
+ return NOTIFY_DONE;
+
+ obj_change = &eqe->data.obj_change;
+ obj_type = be16_to_cpu(obj_change->obj_type);
+ obj_id = be32_to_cpu(obj_change->obj_id);
+
+ if (obj_type != MLX5_GENERAL_OBJECT_TYPES_MACSEC)
+ return NOTIFY_DONE;
+
+ async_work = kzalloc(sizeof(*async_work), GFP_ATOMIC);
+ if (!async_work)
+ return NOTIFY_DONE;
+
+ async_work->macsec = macsec;
+ async_work->mdev = macsec->mdev;
+ async_work->obj_id = obj_id;
+
+ INIT_WORK(&async_work->work, macsec_async_event);
+
+ WARN_ON(!queue_work(macsec->wq, &async_work->work));
+
+ return NOTIFY_OK;
+}
+
+static int mlx5e_macsec_aso_init(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev)
+{
+ struct mlx5_aso *maso;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &aso->pdn);
+ if (err) {
+ mlx5_core_err(mdev,
+ "MACsec offload: Failed to alloc pd for MACsec ASO, err=%d\n",
+ err);
+ return err;
+ }
+
+ maso = mlx5_aso_create(mdev, aso->pdn);
+ if (IS_ERR(maso)) {
+ err = PTR_ERR(maso);
+ goto err_aso;
+ }
+
+ err = mlx5e_macsec_aso_reg_mr(mdev, aso);
+ if (err)
+ goto err_aso_reg;
+
+ mutex_init(&aso->aso_lock);
+
+ aso->maso = maso;
+
+ return 0;
+
+err_aso_reg:
+ mlx5_aso_destroy(maso);
+err_aso:
+ mlx5_core_dealloc_pd(mdev, aso->pdn);
+ return err;
+}
+
+static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev)
+{
+ if (!aso)
+ return;
+
+ mlx5e_macsec_aso_dereg_mr(mdev, aso);
+
+ mlx5_aso_destroy(aso->maso);
+
+ mlx5_core_dealloc_pd(mdev, aso->pdn);
+}
+
+bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev)
+{
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt))
+ return false;
+
+ return true;
+}
+
+void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats)
+{
+ mlx5e_macsec_fs_get_stats_fill(macsec->macsec_fs, macsec_stats);
+}
+
+struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec)
+{
+ if (!macsec)
+ return NULL;
+
+ return &macsec->stats;
+}
+
+static const struct macsec_ops macsec_offload_ops = {
+ .mdo_add_txsa = mlx5e_macsec_add_txsa,
+ .mdo_upd_txsa = mlx5e_macsec_upd_txsa,
+ .mdo_del_txsa = mlx5e_macsec_del_txsa,
+ .mdo_add_rxsc = mlx5e_macsec_add_rxsc,
+ .mdo_upd_rxsc = mlx5e_macsec_upd_rxsc,
+ .mdo_del_rxsc = mlx5e_macsec_del_rxsc,
+ .mdo_add_rxsa = mlx5e_macsec_add_rxsa,
+ .mdo_upd_rxsa = mlx5e_macsec_upd_rxsa,
+ .mdo_del_rxsa = mlx5e_macsec_del_rxsa,
+ .mdo_add_secy = mlx5e_macsec_add_secy,
+ .mdo_upd_secy = mlx5e_macsec_upd_secy,
+ .mdo_del_secy = mlx5e_macsec_del_secy,
+};
+
+bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ u32 fs_id;
+
+ fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci);
+ if (!fs_id)
+ goto err_out;
+
+ return true;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return false;
+}
+
+void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ u32 fs_id;
+
+ fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci);
+ if (!fs_id)
+ return;
+
+ eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2);
+}
+
+void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec *macsec;
+ u32 fs_id;
+
+ macsec = priv->macsec;
+ if (!macsec)
+ return;
+
+ fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data);
+
+ rcu_read_lock();
+ sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id);
+ rx_sc = sc_xarray_element->rx_sc;
+ if (rx_sc) {
+ dst_hold(&rx_sc->md_dst->dst);
+ skb_dst_set(skb, &rx_sc->md_dst->dst);
+ }
+
+ rcu_read_unlock();
+}
+
+void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return;
+
+ /* Enable MACsec */
+ mlx5_core_dbg(priv->mdev, "mlx5e: MACsec acceleration enabled\n");
+ netdev->macsec_ops = &macsec_offload_ops;
+ netdev->features |= NETIF_F_HW_MACSEC;
+ netif_keep_dst(netdev);
+}
+
+int mlx5e_macsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_macsec *macsec = NULL;
+ struct mlx5e_macsec_fs *macsec_fs;
+ int err;
+
+ if (!mlx5e_is_macsec_device(priv->mdev)) {
+ mlx5_core_dbg(mdev, "Not a MACsec offload device\n");
+ return 0;
+ }
+
+ macsec = kzalloc(sizeof(*macsec), GFP_KERNEL);
+ if (!macsec)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&macsec->macsec_device_list_head);
+ mutex_init(&macsec->lock);
+
+ err = rhashtable_init(&macsec->sci_hash, &rhash_sci);
+ if (err) {
+ mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n",
+ err);
+ goto err_hash;
+ }
+
+ err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev);
+ if (err) {
+ mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err);
+ goto err_aso;
+ }
+
+ macsec->wq = alloc_ordered_workqueue("mlx5e_macsec_%s", 0, priv->netdev->name);
+ if (!macsec->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ xa_init_flags(&macsec->sc_xarray, XA_FLAGS_ALLOC1);
+
+ priv->macsec = macsec;
+
+ macsec->mdev = mdev;
+
+ macsec_fs = mlx5e_macsec_fs_init(mdev, priv->netdev);
+ if (!macsec_fs) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ macsec->macsec_fs = macsec_fs;
+
+ macsec->nb.notifier_call = macsec_obj_change_event;
+ mlx5_notifier_register(mdev, &macsec->nb);
+
+ mlx5_core_dbg(mdev, "MACsec attached to netdevice\n");
+
+ return 0;
+
+err_out:
+ destroy_workqueue(macsec->wq);
+err_wq:
+ mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev);
+err_aso:
+ rhashtable_destroy(&macsec->sci_hash);
+err_hash:
+ kfree(macsec);
+ priv->macsec = NULL;
+ return err;
+}
+
+void mlx5e_macsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!macsec)
+ return;
+
+ mlx5_notifier_unregister(mdev, &macsec->nb);
+ mlx5e_macsec_fs_cleanup(macsec->macsec_fs);
+ destroy_workqueue(macsec->wq);
+ mlx5e_macsec_aso_cleanup(&macsec->aso, mdev);
+ rhashtable_destroy(&macsec->sci_hash);
+ mutex_destroy(&macsec->lock);
+ kfree(macsec);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
new file mode 100644
index 000000000000..d580b4a91253
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_ACCEL_MACSEC_H__
+#define __MLX5_EN_ACCEL_MACSEC_H__
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+
+#include <linux/mlx5/driver.h>
+#include <net/macsec.h>
+#include <net/dst_metadata.h>
+
+/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */
+#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1)
+#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0))
+
+struct mlx5e_priv;
+struct mlx5e_macsec;
+
+struct mlx5e_macsec_stats {
+ u64 macsec_rx_pkts;
+ u64 macsec_rx_bytes;
+ u64 macsec_rx_pkts_drop;
+ u64 macsec_rx_bytes_drop;
+ u64 macsec_tx_pkts;
+ u64 macsec_tx_bytes;
+ u64 macsec_tx_pkts_drop;
+ u64 macsec_tx_bytes_drop;
+};
+
+void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_macsec_init(struct mlx5e_priv *priv);
+void mlx5e_macsec_cleanup(struct mlx5e_priv *priv);
+bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb);
+void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
+
+static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+ return md_dst && (md_dst->type == METADATA_MACSEC);
+}
+
+static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe)
+{
+ return MLX5_MACSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
+}
+
+void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe);
+bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev);
+void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats);
+struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec);
+
+#else
+
+static inline void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) {}
+static inline int mlx5e_macsec_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) {}
+static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) { return false; }
+static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; }
+static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{}
+static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) { return false; }
+#endif /* CONFIG_MLX5_EN_MACSEC */
+
+#endif /* __MLX5_ACCEL_EN_MACSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
new file mode 100644
index 000000000000..1ac0cf04e811
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -0,0 +1,1384 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/macsec.h>
+#include <linux/netdevice.h>
+#include <linux/mlx5/qp.h>
+#include "fs_core.h"
+#include "en/fs.h"
+#include "en_accel/macsec_fs.h"
+#include "mlx5_core.h"
+
+/* MACsec TX flow steering */
+#define CRYPTO_NUM_MAXSEC_FTE BIT(15)
+#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1
+
+#define TX_CRYPTO_TABLE_LEVEL 0
+#define TX_CRYPTO_TABLE_NUM_GROUPS 3
+#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1
+#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \
+ (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \
+ CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE))
+#define TX_CHECK_TABLE_LEVEL 1
+#define TX_CHECK_TABLE_NUM_FTE 2
+#define RX_CRYPTO_TABLE_LEVEL 0
+#define RX_CHECK_TABLE_LEVEL 1
+#define RX_CHECK_TABLE_NUM_FTE 3
+#define RX_CRYPTO_TABLE_NUM_GROUPS 3
+#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \
+ ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2)
+#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \
+ (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE)
+#define RX_NUM_OF_RULES_PER_SA 2
+
+#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET)
+#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8
+#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN)
+
+/* MACsec RX flow steering */
+#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E
+
+struct mlx5_sectag_header {
+ __be16 ethertype;
+ u8 tci_an;
+ u8 sl;
+ u32 pn;
+ u8 sci[MACSEC_SCI_LEN]; /* optional */
+} __packed;
+
+struct mlx5e_macsec_tx_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ u32 fs_id;
+};
+
+struct mlx5e_macsec_tables {
+ struct mlx5e_flow_table ft_crypto;
+ struct mlx5_flow_handle *crypto_miss_rule;
+
+ struct mlx5_flow_table *ft_check;
+ struct mlx5_flow_group *ft_check_group;
+ struct mlx5_fc *check_miss_rule_counter;
+ struct mlx5_flow_handle *check_miss_rule;
+ struct mlx5_fc *check_rule_counter;
+
+ u32 refcnt;
+};
+
+struct mlx5e_macsec_tx {
+ struct mlx5_flow_handle *crypto_mke_rule;
+ struct mlx5_flow_handle *check_rule;
+
+ struct ida tx_halloc;
+
+ struct mlx5e_macsec_tables tables;
+};
+
+struct mlx5e_macsec_rx_rule {
+ struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA];
+ struct mlx5_modify_hdr *meta_modhdr;
+};
+
+struct mlx5e_macsec_rx {
+ struct mlx5_flow_handle *check_rule[2];
+ struct mlx5_pkt_reformat *check_rule_pkt_reformat[2];
+
+ struct mlx5e_macsec_tables tables;
+};
+
+union mlx5e_macsec_rule {
+ struct mlx5e_macsec_tx_rule tx_rule;
+ struct mlx5e_macsec_rx_rule rx_rule;
+};
+
+struct mlx5e_macsec_fs {
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_macsec_tx *tx_fs;
+ struct mlx5e_macsec_rx *rx_fs;
+};
+
+static void macsec_fs_tx_destroy(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5e_macsec_tables *tx_tables;
+
+ tx_tables = &tx_fs->tables;
+
+ /* Tx check table */
+ if (tx_fs->check_rule) {
+ mlx5_del_flow_rules(tx_fs->check_rule);
+ tx_fs->check_rule = NULL;
+ }
+
+ if (tx_tables->check_miss_rule) {
+ mlx5_del_flow_rules(tx_tables->check_miss_rule);
+ tx_tables->check_miss_rule = NULL;
+ }
+
+ if (tx_tables->ft_check_group) {
+ mlx5_destroy_flow_group(tx_tables->ft_check_group);
+ tx_tables->ft_check_group = NULL;
+ }
+
+ if (tx_tables->ft_check) {
+ mlx5_destroy_flow_table(tx_tables->ft_check);
+ tx_tables->ft_check = NULL;
+ }
+
+ /* Tx crypto table */
+ if (tx_fs->crypto_mke_rule) {
+ mlx5_del_flow_rules(tx_fs->crypto_mke_rule);
+ tx_fs->crypto_mke_rule = NULL;
+ }
+
+ if (tx_tables->crypto_miss_rule) {
+ mlx5_del_flow_rules(tx_tables->crypto_miss_rule);
+ tx_tables->crypto_miss_rule = NULL;
+ }
+
+ mlx5e_destroy_flow_table(&tx_tables->ft_crypto);
+}
+
+static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ /* Flow Group for MKE match */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for SA rules */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_table
+ *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags,
+ int level, int max_fte)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *fdb = NULL;
+
+ /* reserve entry for the match all miss group and rule */
+ ft_attr.autogroup.num_reserved_entries = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.prio = 0;
+ ft_attr.flags = flags;
+ ft_attr.level = level;
+ ft_attr.max_fte = max_fte;
+
+ fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+
+ return fdb;
+}
+
+static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_table *flow_table;
+ struct mlx5_flow_group *flow_group;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+ if (!ns)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ goto out_spec;
+
+ tx_tables = &tx_fs->tables;
+ ft_crypto = &tx_tables->ft_crypto;
+
+ /* Tx crypto table */
+ ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.level = TX_CRYPTO_TABLE_LEVEL;
+ ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+ flow_table = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "Failed to create MACsec Tx crypto table err(%d)\n", err);
+ goto out_flow_group;
+ }
+ ft_crypto->t = flow_table;
+
+ /* Tx crypto table groups */
+ err = macsec_fs_tx_create_crypto_table_groups(ft_crypto);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+
+ /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */
+ memset(&flow_act, 0, sizeof(flow_act));
+ memset(spec, 0, sizeof(*spec));
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec TX MKE rule, err=%d\n", err);
+ goto err;
+ }
+ tx_fs->crypto_mke_rule = rule;
+
+ /* Tx crypto table Default miss rule */
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec Tx table default miss rule %d\n", err);
+ goto err;
+ }
+ tx_tables->crypto_miss_rule = rule;
+
+ /* Tx check table */
+ flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL,
+ TX_CHECK_TABLE_NUM_FTE);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "fail to create MACsec TX check table, err(%d)\n", err);
+ goto err;
+ }
+ tx_tables->ft_check = flow_table;
+
+ /* Tx check table Default miss group/rule */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+ flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in);
+ if (IS_ERR(flow_group)) {
+ err = PTR_ERR(flow_group);
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+ tx_tables->ft_check_group = flow_group;
+
+ /* Tx check table default drop rule */
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err);
+ goto err;
+ }
+ tx_tables->check_miss_rule = rule;
+
+ /* Tx check table rule */
+ memset(spec, 0, sizeof(struct mlx5_flow_spec));
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ flow_act.flags = FLOW_ACT_NO_APPEND;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter);
+ rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec check rule, err=%d\n", err);
+ goto err;
+ }
+ tx_fs->check_rule = rule;
+
+ goto out_flow_group;
+
+err:
+ macsec_fs_tx_destroy(macsec_fs);
+out_flow_group:
+ kvfree(flow_group_in);
+out_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int macsec_fs_tx_ft_get(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5e_macsec_tables *tx_tables;
+ int err = 0;
+
+ tx_tables = &tx_fs->tables;
+ if (tx_tables->refcnt)
+ goto out;
+
+ err = macsec_fs_tx_create(macsec_fs);
+ if (err)
+ return err;
+
+out:
+ tx_tables->refcnt++;
+ return err;
+}
+
+static void macsec_fs_tx_ft_put(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+
+ if (--tx_tables->refcnt)
+ return;
+
+ macsec_fs_tx_destroy(macsec_fs);
+}
+
+static int macsec_fs_tx_setup_fte(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ u32 macsec_obj_id,
+ u32 *fs_id)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ int err = 0;
+ u32 id;
+
+ err = ida_alloc_range(&tx_fs->tx_halloc, 1,
+ MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES,
+ GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ id = err;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ /* Metadata match */
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC | id << 2);
+
+ *fs_id = id;
+ flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+ flow_act->crypto.obj_id = macsec_obj_id;
+
+ mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id);
+ return 0;
+}
+
+static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx,
+ char *reformatbf,
+ size_t *reformat_size)
+{
+ const struct macsec_secy *secy = ctx->secy;
+ bool sci_present = macsec_send_sci(secy);
+ struct mlx5_sectag_header sectag = {};
+ const struct macsec_tx_sc *tx_sc;
+
+ tx_sc = &secy->tx_sc;
+ sectag.ethertype = htons(ETH_P_MACSEC);
+
+ if (sci_present) {
+ sectag.tci_an |= MACSEC_TCI_SC;
+ memcpy(&sectag.sci, &secy->sci,
+ sizeof(sectag.sci));
+ } else {
+ if (tx_sc->end_station)
+ sectag.tci_an |= MACSEC_TCI_ES;
+ if (tx_sc->scb)
+ sectag.tci_an |= MACSEC_TCI_SCB;
+ }
+
+ /* With GCM, C/E clear for !encrypt, both set for encrypt */
+ if (tx_sc->encrypt)
+ sectag.tci_an |= MACSEC_TCI_CONFID;
+ else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN)
+ sectag.tci_an |= MACSEC_TCI_C;
+
+ sectag.tci_an |= tx_sc->encoding_sa;
+
+ *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0);
+
+ memcpy(reformatbf, &sectag, *reformat_size);
+}
+
+static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5e_macsec_tx_rule *tx_rule)
+{
+ if (tx_rule->rule) {
+ mlx5_del_flow_rules(tx_rule->rule);
+ tx_rule->rule = NULL;
+ }
+
+ if (tx_rule->pkt_reformat) {
+ mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat);
+ tx_rule->pkt_reformat = NULL;
+ }
+
+ if (tx_rule->fs_id) {
+ ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id);
+ tx_rule->fs_id = 0;
+ }
+
+ kfree(tx_rule);
+
+ macsec_fs_tx_ft_put(macsec_fs);
+}
+
+static union mlx5e_macsec_rule *
+macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id)
+{
+ char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN];
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ union mlx5e_macsec_rule *macsec_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5e_macsec_tx_rule *tx_rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ size_t reformat_size;
+ int err = 0;
+ u32 fs_id;
+
+ tx_tables = &tx_fs->tables;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ err = macsec_fs_tx_ft_get(macsec_fs);
+ if (err)
+ goto out_spec;
+
+ macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+ if (!macsec_rule) {
+ macsec_fs_tx_ft_put(macsec_fs);
+ goto out_spec;
+ }
+
+ tx_rule = &macsec_rule->tx_rule;
+
+ /* Tx crypto table crypto rule */
+ macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size);
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC;
+ reformat_params.size = reformat_size;
+ reformat_params.data = reformatbf;
+ flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+ if (IS_ERR(flow_act.pkt_reformat)) {
+ err = PTR_ERR(flow_act.pkt_reformat);
+ netdev_err(netdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err);
+ goto err;
+ }
+ tx_rule->pkt_reformat = flow_act.pkt_reformat;
+
+ err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, &fs_id);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+
+ tx_rule->fs_id = fs_id;
+ *sa_fs_id = fs_id;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = tx_tables->ft_check;
+ rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec TX crypto rule, err=%d\n", err);
+ goto err;
+ }
+ tx_rule->rule = rule;
+
+ goto out_spec;
+
+err:
+ macsec_fs_tx_del_rule(macsec_fs, tx_rule);
+ macsec_rule = NULL;
+out_spec:
+ kvfree(spec);
+
+ return macsec_rule;
+}
+
+static void macsec_fs_tx_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *tx_tables;
+
+ if (!tx_fs)
+ return;
+
+ tx_tables = &tx_fs->tables;
+ if (tx_tables->refcnt) {
+ netdev_err(macsec_fs->netdev,
+ "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n",
+ tx_tables->refcnt);
+ return;
+ }
+
+ ida_destroy(&tx_fs->tx_halloc);
+
+ if (tx_tables->check_miss_rule_counter) {
+ mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter);
+ tx_tables->check_miss_rule_counter = NULL;
+ }
+
+ if (tx_tables->check_rule_counter) {
+ mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+ tx_tables->check_rule_counter = NULL;
+ }
+
+ kfree(tx_fs);
+ macsec_fs->tx_fs = NULL;
+}
+
+static int macsec_fs_tx_init(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5e_macsec_tx *tx_fs;
+ struct mlx5_fc *flow_counter;
+ int err;
+
+ tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL);
+ if (!tx_fs)
+ return -ENOMEM;
+
+ tx_tables = &tx_fs->tables;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Tx encrypt flow counter, err(%d)\n",
+ err);
+ goto err_encrypt_counter;
+ }
+ tx_tables->check_rule_counter = flow_counter;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Tx drop flow counter, err(%d)\n",
+ err);
+ goto err_drop_counter;
+ }
+ tx_tables->check_miss_rule_counter = flow_counter;
+
+ ida_init(&tx_fs->tx_halloc);
+
+ macsec_fs->tx_fs = tx_fs;
+
+ return 0;
+
+err_drop_counter:
+ mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+ tx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+ kfree(tx_fs);
+ macsec_fs->tx_fs = NULL;
+
+ return err;
+}
+
+static void macsec_fs_rx_destroy(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct mlx5e_macsec_tables *rx_tables;
+ int i;
+
+ /* Rx check table */
+ for (i = 1; i >= 0; --i) {
+ if (rx_fs->check_rule[i]) {
+ mlx5_del_flow_rules(rx_fs->check_rule[i]);
+ rx_fs->check_rule[i] = NULL;
+ }
+
+ if (rx_fs->check_rule_pkt_reformat[i]) {
+ mlx5_packet_reformat_dealloc(macsec_fs->mdev,
+ rx_fs->check_rule_pkt_reformat[i]);
+ rx_fs->check_rule_pkt_reformat[i] = NULL;
+ }
+ }
+
+ rx_tables = &rx_fs->tables;
+
+ if (rx_tables->check_miss_rule) {
+ mlx5_del_flow_rules(rx_tables->check_miss_rule);
+ rx_tables->check_miss_rule = NULL;
+ }
+
+ if (rx_tables->ft_check_group) {
+ mlx5_destroy_flow_group(rx_tables->ft_check_group);
+ rx_tables->ft_check_group = NULL;
+ }
+
+ if (rx_tables->ft_check) {
+ mlx5_destroy_flow_table(rx_tables->ft_check);
+ rx_tables->ft_check = NULL;
+ }
+
+ /* Rx crypto table */
+ if (rx_tables->crypto_miss_rule) {
+ mlx5_del_flow_rules(rx_tables->crypto_miss_rule);
+ rx_tables->crypto_miss_rule = NULL;
+ }
+
+ mlx5e_destroy_flow_table(&rx_tables->ft_crypto);
+}
+
+static int macsec_fs_rx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ /* Flow group for SA rule with SCI */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS_5);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK <<
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+ MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2);
+ MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow group for SA rule without SCI */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS_5);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static int macsec_fs_rx_create_check_decap_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec,
+ int reformat_param_size)
+{
+ int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1;
+ u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI];
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ rx_tables = &rx_fs->tables;
+
+ /* Rx check table decap 16B rule */
+ memset(dest, 0, sizeof(*dest));
+ memset(flow_act, 0, sizeof(*flow_act));
+ memset(spec, 0, sizeof(*spec));
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC;
+ reformat_params.size = reformat_param_size;
+ reformat_params.data = mlx5_reformat_buf;
+ flow_act->pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+ if (IS_ERR(flow_act->pkt_reformat)) {
+ err = PTR_ERR(flow_act->pkt_reformat);
+ netdev_err(netdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err);
+ return err;
+ }
+ rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ /* MACsec syndrome match */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0);
+ /* ASO return reg syndrome match */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+ /* Sectag TCI SC present bit*/
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI)
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT <<
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ flow_act->flags = FLOW_ACT_NO_APPEND;
+ flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest->counter_id = mlx5_fc_id(rx_tables->check_rule_counter);
+ rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec Rx check rule, err=%d\n", err);
+ return err;
+ }
+
+ rx_fs->check_rule[rule_index] = rule;
+
+ return 0;
+}
+
+static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_table *flow_table;
+ struct mlx5_flow_group *flow_group;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+ if (!ns)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ goto free_spec;
+
+ rx_tables = &rx_fs->tables;
+ ft_crypto = &rx_tables->ft_crypto;
+
+ /* Rx crypto table */
+ ft_attr.level = RX_CRYPTO_TABLE_LEVEL;
+ ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+ flow_table = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "Failed to create MACsec Rx crypto table err(%d)\n", err);
+ goto out_flow_group;
+ }
+ ft_crypto->t = flow_table;
+
+ /* Rx crypto table groups */
+ err = macsec_fs_rx_create_crypto_table_groups(ft_crypto);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add MACsec Rx crypto table default miss rule %d\n",
+ err);
+ goto err;
+ }
+ rx_tables->crypto_miss_rule = rule;
+
+ /* Rx check table */
+ flow_table = macsec_fs_auto_group_table_create(ns,
+ MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT,
+ RX_CHECK_TABLE_LEVEL,
+ RX_CHECK_TABLE_NUM_FTE);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "fail to create MACsec RX check table, err(%d)\n", err);
+ goto err;
+ }
+ rx_tables->ft_check = flow_table;
+
+ /* Rx check table Default miss group/rule */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+ flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in);
+ if (IS_ERR(flow_group)) {
+ err = PTR_ERR(flow_group);
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Rx check table err(%d)\n",
+ err);
+ goto err;
+ }
+ rx_tables->ft_check_group = flow_group;
+
+ /* Rx check table default drop rule */
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err);
+ goto err;
+ }
+ rx_tables->check_miss_rule = rule;
+
+ /* Rx check table decap rules */
+ err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+ MLX5_SECTAG_HEADER_SIZE_WITH_SCI);
+ if (err)
+ goto err;
+
+ err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+ MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI);
+ if (err)
+ goto err;
+
+ goto out_flow_group;
+
+err:
+ macsec_fs_rx_destroy(macsec_fs);
+out_flow_group:
+ kvfree(flow_group_in);
+free_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int macsec_fs_rx_ft_get(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+ int err = 0;
+
+ if (rx_tables->refcnt)
+ goto out;
+
+ err = macsec_fs_rx_create(macsec_fs);
+ if (err)
+ return err;
+
+out:
+ rx_tables->refcnt++;
+ return err;
+}
+
+static void macsec_fs_rx_ft_put(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+
+ if (--rx_tables->refcnt)
+ return;
+
+ macsec_fs_rx_destroy(macsec_fs);
+}
+
+static void macsec_fs_rx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5e_macsec_rx_rule *rx_rule)
+{
+ int i;
+
+ for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) {
+ if (rx_rule->rule[i]) {
+ mlx5_del_flow_rules(rx_rule->rule[i]);
+ rx_rule->rule[i] = NULL;
+ }
+ }
+
+ if (rx_rule->meta_modhdr) {
+ mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr);
+ rx_rule->meta_modhdr = NULL;
+ }
+
+ kfree(rx_rule);
+
+ macsec_fs_rx_ft_put(macsec_fs);
+}
+
+static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_macsec_rule_attrs *attrs,
+ bool sci_present)
+{
+ u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num;
+ struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto;
+ __be32 *sci_p = (__be32 *)(&attrs->sci);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ /* MACsec ethertype */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+
+ /* Sectag AN + TCI SC present bit*/
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+ tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ if (sci_present) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters_5.macsec_tag_2);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2,
+ be32_to_cpu(sci_p[0]));
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters_5.macsec_tag_3);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3,
+ be32_to_cpu(sci_p[1]));
+ } else {
+ /* When SCI isn't present in the Sectag, need to match the source */
+ /* MAC address only if the SCI contains the default MACsec PORT */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16),
+ sci_p, ETH_ALEN);
+ }
+
+ crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+ crypto_params->obj_id = attrs->macsec_obj_id;
+}
+
+static union mlx5e_macsec_rule *
+macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 fs_id)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ union mlx5e_macsec_rule *macsec_rule = NULL;
+ struct mlx5_modify_hdr *modify_hdr = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_macsec_rx_rule *rx_rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ err = macsec_fs_rx_ft_get(macsec_fs);
+ if (err)
+ goto out_spec;
+
+ macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+ if (!macsec_rule) {
+ macsec_fs_rx_ft_put(macsec_fs);
+ goto out_spec;
+ }
+
+ rx_rule = &macsec_rule->rx_rule;
+ rx_tables = &rx_fs->tables;
+ ft_crypto = &rx_tables->ft_crypto;
+
+ /* Set bit[31 - 30] macsec marker - 0x01 */
+ /* Set bit[3-0] fs id */
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(set_action_in, action, data, fs_id | BIT(30));
+ MLX5_SET(set_action_in, action, offset, 0);
+ MLX5_SET(set_action_in, action, length, 32);
+
+ modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC,
+ 1, action);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(netdev, "fail to alloc MACsec set modify_header_id err=%d\n", err);
+ modify_hdr = NULL;
+ goto err;
+ }
+ rx_rule->meta_modhdr = modify_hdr;
+
+ /* Rx crypto table with SCI rule */
+ macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true);
+
+ flow_act.modify_hdr = modify_hdr;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rx_tables->ft_check;
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+ rx_rule->rule[0] = rule;
+
+ /* Rx crypto table without SCI rule */
+ if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) {
+ memset(spec, 0, sizeof(struct mlx5_flow_spec));
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false);
+
+ flow_act.modify_hdr = modify_hdr;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rx_tables->ft_check;
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+ rx_rule->rule[1] = rule;
+ }
+
+ return macsec_rule;
+
+err:
+ macsec_fs_rx_del_rule(macsec_fs, rx_rule);
+ macsec_rule = NULL;
+out_spec:
+ kvfree(spec);
+ return macsec_rule;
+}
+
+static int macsec_fs_rx_init(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_macsec_rx *rx_fs;
+ struct mlx5_fc *flow_counter;
+ int err;
+
+ rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL);
+ if (!rx_fs)
+ return -ENOMEM;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Rx encrypt flow counter, err(%d)\n",
+ err);
+ goto err_encrypt_counter;
+ }
+
+ rx_tables = &rx_fs->tables;
+ rx_tables->check_rule_counter = flow_counter;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Rx drop flow counter, err(%d)\n",
+ err);
+ goto err_drop_counter;
+ }
+ rx_tables->check_miss_rule_counter = flow_counter;
+
+ macsec_fs->rx_fs = rx_fs;
+
+ return 0;
+
+err_drop_counter:
+ mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+ rx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+ kfree(rx_fs);
+ macsec_fs->rx_fs = NULL;
+
+ return err;
+}
+
+static void macsec_fs_rx_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *rx_tables;
+
+ if (!rx_fs)
+ return;
+
+ rx_tables = &rx_fs->tables;
+
+ if (rx_tables->refcnt) {
+ netdev_err(macsec_fs->netdev,
+ "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n",
+ rx_tables->refcnt);
+ return;
+ }
+
+ if (rx_tables->check_miss_rule_counter) {
+ mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter);
+ rx_tables->check_miss_rule_counter = NULL;
+ }
+
+ if (rx_tables->check_rule_counter) {
+ mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+ rx_tables->check_rule_counter = NULL;
+ }
+
+ kfree(rx_fs);
+ macsec_fs->rx_fs = NULL;
+}
+
+void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats)
+{
+ struct mlx5e_macsec_stats *stats = (struct mlx5e_macsec_stats *)macsec_stats;
+ struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+
+ if (tx_tables->check_rule_counter)
+ mlx5_fc_query(mdev, tx_tables->check_rule_counter,
+ &stats->macsec_tx_pkts, &stats->macsec_tx_bytes);
+
+ if (tx_tables->check_miss_rule_counter)
+ mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter,
+ &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop);
+
+ if (rx_tables->check_rule_counter)
+ mlx5_fc_query(mdev, rx_tables->check_rule_counter,
+ &stats->macsec_rx_pkts, &stats->macsec_rx_bytes);
+
+ if (rx_tables->check_miss_rule_counter)
+ mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter,
+ &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop);
+}
+
+union mlx5e_macsec_rule *
+mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id)
+{
+ return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+ macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) :
+ macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id);
+}
+
+void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ union mlx5e_macsec_rule *macsec_rule,
+ int action)
+{
+ (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+ macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule) :
+ macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule);
+}
+
+void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ macsec_fs_rx_cleanup(macsec_fs);
+ macsec_fs_tx_cleanup(macsec_fs);
+ kfree(macsec_fs);
+}
+
+struct mlx5e_macsec_fs *
+mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_macsec_fs *macsec_fs;
+ int err;
+
+ macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL);
+ if (!macsec_fs)
+ return NULL;
+
+ macsec_fs->mdev = mdev;
+ macsec_fs->netdev = netdev;
+
+ err = macsec_fs_tx_init(macsec_fs);
+ if (err) {
+ netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+ goto err;
+ }
+
+ err = macsec_fs_rx_init(macsec_fs);
+ if (err) {
+ netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+ goto tx_cleanup;
+ }
+
+ return macsec_fs;
+
+tx_cleanup:
+ macsec_fs_tx_cleanup(macsec_fs);
+err:
+ kfree(macsec_fs);
+ return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h
new file mode 100644
index 000000000000..b429648d4ee7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_MACSEC_STEERING_H__
+#define __MLX5_MACSEC_STEERING_H__
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+
+#include "en_accel/macsec.h"
+
+#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16
+
+struct mlx5e_macsec_fs;
+union mlx5e_macsec_rule;
+
+struct mlx5_macsec_rule_attrs {
+ sci_t sci;
+ u32 macsec_obj_id;
+ u8 assoc_num;
+ int action;
+};
+
+enum mlx5_macsec_action {
+ MLX5_ACCEL_MACSEC_ACTION_ENCRYPT,
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT,
+};
+
+void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs);
+
+struct mlx5e_macsec_fs *
+mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, struct net_device *netdev);
+
+union mlx5e_macsec_rule *
+mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id);
+
+void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ union mlx5e_macsec_rule *macsec_rule,
+ int action);
+
+void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats);
+
+#endif
+
+#endif /* __MLX5_MACSEC_STEERING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c
new file mode 100644
index 000000000000..e50a2e3f3d18
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "en_accel/macsec.h"
+
+static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes_drop) },
+};
+
+#define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(macsec_hw)
+{
+ if (!priv->macsec)
+ return 0;
+
+ if (mlx5e_is_macsec_device(priv->mdev))
+ return NUM_MACSEC_HW_COUNTERS;
+
+ return 0;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(macsec_hw) {}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw)
+{
+ unsigned int i;
+
+ if (!priv->macsec)
+ return idx;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return idx;
+
+ for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_macsec_hw_stats_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw)
+{
+ int i;
+
+ if (!priv->macsec)
+ return idx;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return idx;
+
+ mlx5e_macsec_get_stats_fill(priv->macsec, mlx5e_macsec_get_stats(priv->macsec));
+ for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5e_macsec_get_stats(priv->macsec),
+ mlx5e_macsec_hw_stats_desc,
+ i);
+
+ return idx;
+}
+
+MLX5E_DEFINE_STATS_GRP(macsec_hw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
deleted file mode 100644
index b8fc863aa68d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include "en_accel/tls.h"
-#include "accel/tls.h"
-
-static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- MLX5_SET(tls_flow, flow, ipv6, 0);
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- MLX5_SET(tls_flow, flow, ipv6, 1);
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-}
-#endif
-
-static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
- MLX5_FLD_SZ_BYTES(tls_flow, src_port));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
- MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
-}
-
-static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
-{
- switch (sk->sk_family) {
- case AF_INET:
- mlx5e_tls_set_ipv4_flow(flow, sk);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- if (!sk->sk_ipv6only &&
- ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
- mlx5e_tls_set_ipv4_flow(flow, sk);
- break;
- }
- if (!(caps & MLX5_ACCEL_TLS_IPV6))
- goto error_out;
-
- mlx5e_tls_set_ipv6_flow(flow, sk);
- break;
-#endif
- default:
- goto error_out;
- }
-
- mlx5e_tls_set_flow_tcp_ports(flow, sk);
- return 0;
-error_out:
- return -EINVAL;
-}
-
-static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
- enum tls_offload_ctx_dir direction,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 caps = mlx5_accel_tls_device_caps(mdev);
- int ret = -ENOMEM;
- void *flow;
- u32 swid;
-
- flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
- if (!flow)
- return ret;
-
- ret = mlx5e_tls_set_flow(flow, sk, caps);
- if (ret)
- goto free_flow;
-
- ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info,
- start_offload_tcp_sn, &swid,
- direction == TLS_OFFLOAD_CTX_DIR_TX);
- if (ret < 0)
- goto free_flow;
-
- if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
- struct mlx5e_tls_offload_context_tx *tx_ctx =
- mlx5e_get_tls_tx_context(tls_ctx);
-
- tx_ctx->swid = htonl(swid);
- tx_ctx->expected_seq = start_offload_tcp_sn;
- } else {
- struct mlx5e_tls_offload_context_rx *rx_ctx =
- mlx5e_get_tls_rx_context(tls_ctx);
-
- rx_ctx->handle = htonl(swid);
- }
-
- return 0;
-free_flow:
- kfree(flow);
- return ret;
-}
-
-static void mlx5e_tls_del(struct net_device *netdev,
- struct tls_context *tls_ctx,
- enum tls_offload_ctx_dir direction)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- unsigned int handle;
-
- handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ?
- mlx5e_get_tls_tx_context(tls_ctx)->swid :
- mlx5e_get_tls_rx_context(tls_ctx)->handle);
-
- mlx5_accel_tls_del_flow(priv->mdev, handle,
- direction == TLS_OFFLOAD_CTX_DIR_TX);
-}
-
-static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
- u32 seq, u8 *rcd_sn_data,
- enum tls_offload_ctx_dir direction)
-{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_tls_offload_context_rx *rx_ctx;
- __be64 rcd_sn = *(__be64 *)rcd_sn_data;
-
- if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
- return -EINVAL;
- rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
-
- netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
- be64_to_cpu(rcd_sn));
- mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
-
- return 0;
-}
-
-static const struct tlsdev_ops mlx5e_tls_ops = {
- .tls_dev_add = mlx5e_tls_add,
- .tls_dev_del = mlx5e_tls_del,
- .tls_dev_resync = mlx5e_tls_resync,
-};
-
-void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
-{
- struct net_device *netdev = priv->netdev;
- u32 caps;
-
- if (mlx5e_accel_is_ktls_device(priv->mdev)) {
- mlx5e_ktls_build_netdev(priv);
- return;
- }
-
- /* FPGA */
- if (!mlx5e_accel_is_tls_device(priv->mdev))
- return;
-
- caps = mlx5_accel_tls_device_caps(priv->mdev);
- if (caps & MLX5_ACCEL_TLS_TX) {
- netdev->features |= NETIF_F_HW_TLS_TX;
- netdev->hw_features |= NETIF_F_HW_TLS_TX;
- }
-
- if (caps & MLX5_ACCEL_TLS_RX) {
- netdev->features |= NETIF_F_HW_TLS_RX;
- netdev->hw_features |= NETIF_F_HW_TLS_RX;
- }
-
- if (!(caps & MLX5_ACCEL_TLS_LRO)) {
- netdev->features &= ~NETIF_F_LRO;
- netdev->hw_features &= ~NETIF_F_LRO;
- }
-
- netdev->tlsdev_ops = &mlx5e_tls_ops;
-}
-
-int mlx5e_tls_init(struct mlx5e_priv *priv)
-{
- struct mlx5e_tls *tls;
-
- if (!mlx5e_accel_is_tls_device(priv->mdev))
- return 0;
-
- tls = kzalloc(sizeof(*tls), GFP_KERNEL);
- if (!tls)
- return -ENOMEM;
-
- priv->tls = tls;
- return 0;
-}
-
-void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
-{
- struct mlx5e_tls *tls = priv->tls;
-
- if (!tls)
- return;
-
- kfree(tls);
- priv->tls = NULL;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
deleted file mode 100644
index 62ecf14bf86a..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#ifndef __MLX5E_TLS_H__
-#define __MLX5E_TLS_H__
-
-#include "accel/tls.h"
-#include "en_accel/ktls.h"
-
-#ifdef CONFIG_MLX5_EN_TLS
-#include <net/tls.h>
-#include "en.h"
-
-struct mlx5e_tls_sw_stats {
- atomic64_t tx_tls_ctx;
- atomic64_t tx_tls_del;
- atomic64_t tx_tls_drop_metadata;
- atomic64_t tx_tls_drop_resync_alloc;
- atomic64_t tx_tls_drop_no_sync_data;
- atomic64_t tx_tls_drop_bypass_required;
- atomic64_t rx_tls_ctx;
- atomic64_t rx_tls_del;
- atomic64_t rx_tls_drop_resync_request;
- atomic64_t rx_tls_resync_request;
- atomic64_t rx_tls_resync_reply;
- atomic64_t rx_tls_auth_fail;
-};
-
-struct mlx5e_tls {
- struct mlx5e_tls_sw_stats sw_stats;
- struct workqueue_struct *rx_wq;
-};
-
-struct mlx5e_tls_offload_context_tx {
- struct tls_offload_context_tx base;
- u32 expected_seq;
- __be32 swid;
-};
-
-static inline struct mlx5e_tls_offload_context_tx *
-mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
-{
- BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) >
- TLS_OFFLOAD_CONTEXT_SIZE_TX);
- return container_of(tls_offload_ctx_tx(tls_ctx),
- struct mlx5e_tls_offload_context_tx,
- base);
-}
-
-struct mlx5e_tls_offload_context_rx {
- struct tls_offload_context_rx base;
- __be32 handle;
-};
-
-static inline struct mlx5e_tls_offload_context_rx *
-mlx5e_get_tls_rx_context(struct tls_context *tls_ctx)
-{
- BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) >
- TLS_OFFLOAD_CONTEXT_SIZE_RX);
- return container_of(tls_offload_ctx_rx(tls_ctx),
- struct mlx5e_tls_offload_context_rx,
- base);
-}
-
-static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv)
-{
- return priv->tls;
-}
-
-void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
-int mlx5e_tls_init(struct mlx5e_priv *priv);
-void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
-
-int mlx5e_tls_get_count(struct mlx5e_priv *priv);
-int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
-int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
-
-static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return !is_kdump_kernel() &&
- mlx5_accel_is_tls_device(mdev);
-}
-
-#else
-
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
-{
- if (!is_kdump_kernel() &&
- mlx5_accel_is_ktls_device(priv->mdev))
- mlx5e_ktls_build_netdev(priv);
-}
-
-static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) { return false; }
-static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
-static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
-static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
-static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
-static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
-
-#endif
-
-#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
deleted file mode 100644
index 7a700f913582..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include "en_accel/tls.h"
-#include "en_accel/tls_rxtx.h"
-#include "accel/accel.h"
-
-#include <net/inet6_hashtables.h>
-#include <linux/ipv6.h>
-
-#define SYNDROM_DECRYPTED 0x30
-#define SYNDROM_RESYNC_REQUEST 0x31
-#define SYNDROM_AUTH_FAILED 0x32
-
-#define SYNDROME_OFFLOAD_REQUIRED 32
-#define SYNDROME_SYNC 33
-
-struct sync_info {
- u64 rcd_sn;
- s32 sync_len;
- int nr_frags;
- skb_frag_t frags[MAX_SKB_FRAGS];
-};
-
-struct recv_metadata_content {
- u8 syndrome;
- u8 reserved;
- __be32 sync_seq;
-} __packed;
-
-struct send_metadata_content {
- /* One byte of syndrome followed by 3 bytes of swid */
- __be32 syndrome_swid;
- __be16 first_seq;
-} __packed;
-
-struct mlx5e_tls_metadata {
- union {
- /* from fpga to host */
- struct recv_metadata_content recv;
- /* from host to fpga */
- struct send_metadata_content send;
- unsigned char raw[6];
- } __packed content;
- /* packet type ID field */
- __be16 ethertype;
-} __packed;
-
-static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
-{
- struct mlx5e_tls_metadata *pet;
- struct ethhdr *eth;
-
- if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
- return -ENOMEM;
-
- eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
- skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
- pet = (struct mlx5e_tls_metadata *)(eth + 1);
-
- memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
- 2 * ETH_ALEN);
-
- eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
- pet->content.send.syndrome_swid =
- htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
-
- return 0;
-}
-
-static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context,
- u32 tcp_seq, struct sync_info *info)
-{
- int remaining, i = 0, ret = -EINVAL;
- struct tls_record_info *record;
- unsigned long flags;
- s32 sync_size;
-
- spin_lock_irqsave(&context->base.lock, flags);
- record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
-
- if (unlikely(!record))
- goto out;
-
- sync_size = tcp_seq - tls_record_start_seq(record);
- info->sync_len = sync_size;
- if (unlikely(sync_size < 0)) {
- if (tls_record_is_start_marker(record))
- goto done;
-
- goto out;
- }
-
- remaining = sync_size;
- while (remaining > 0) {
- info->frags[i] = record->frags[i];
- __skb_frag_ref(&info->frags[i]);
- remaining -= skb_frag_size(&info->frags[i]);
-
- if (remaining < 0)
- skb_frag_size_add(&info->frags[i], remaining);
-
- i++;
- }
- info->nr_frags = i;
-done:
- ret = 0;
-out:
- spin_unlock_irqrestore(&context->base.lock, flags);
- return ret;
-}
-
-static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
- struct sk_buff *nskb, u32 tcp_seq,
- int headln, __be64 rcd_sn)
-{
- struct mlx5e_tls_metadata *pet;
- u8 syndrome = SYNDROME_SYNC;
- struct iphdr *iph;
- struct tcphdr *th;
- int data_len, mss;
-
- nskb->dev = skb->dev;
- skb_reset_mac_header(nskb);
- skb_set_network_header(nskb, skb_network_offset(skb));
- skb_set_transport_header(nskb, skb_transport_offset(skb));
- memcpy(nskb->data, skb->data, headln);
- memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
-
- iph = ip_hdr(nskb);
- iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
- th = tcp_hdr(nskb);
- data_len = nskb->len - headln;
- tcp_seq -= data_len;
- th->seq = htonl(tcp_seq);
-
- mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
- skb_shinfo(nskb)->gso_size = 0;
- if (data_len > mss) {
- skb_shinfo(nskb)->gso_size = mss;
- skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
- }
- skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
-
- pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
- memcpy(pet, &syndrome, sizeof(syndrome));
- pet->content.send.first_seq = htons(tcp_seq);
-
- /* MLX5 devices don't care about the checksum partial start, offset
- * and pseudo header
- */
- nskb->ip_summed = CHECKSUM_PARTIAL;
-
- nskb->queue_mapping = skb->queue_mapping;
-}
-
-static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
- struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tls *tls)
-{
- u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
- struct sync_info info;
- struct sk_buff *nskb;
- int linear_len = 0;
- int headln;
- int i;
-
- sq->stats->tls_ooo++;
-
- if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
- /* We might get here if a retransmission reaches the driver
- * after the relevant record is acked.
- * It should be safe to drop the packet in this case
- */
- atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
- goto err_out;
- }
-
- if (unlikely(info.sync_len < 0)) {
- u32 payload;
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- payload = skb->len - headln;
- if (likely(payload <= -info.sync_len))
- /* SKB payload doesn't require offload
- */
- return true;
-
- atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
- goto err_out;
- }
-
- if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
- atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
- goto err_out;
- }
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- linear_len += headln + sizeof(info.rcd_sn);
- nskb = alloc_skb(linear_len, GFP_ATOMIC);
- if (unlikely(!nskb)) {
- atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
- goto err_out;
- }
-
- context->expected_seq = tcp_seq + skb->len - headln;
- skb_put(nskb, linear_len);
- for (i = 0; i < info.nr_frags; i++)
- skb_shinfo(nskb)->frags[i] = info.frags[i];
-
- skb_shinfo(nskb)->nr_frags = info.nr_frags;
- nskb->data_len = info.sync_len;
- nskb->len += info.sync_len;
- sq->stats->tls_resync_bytes += nskb->len;
- mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
- cpu_to_be64(info.rcd_sn));
- mlx5e_sq_xmit_simple(sq, nskb, true);
-
- return true;
-
-err_out:
- dev_kfree_skb_any(skb);
- return false;
-}
-
-bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_tls_offload_context_tx *context;
- struct tls_context *tls_ctx;
- u32 expected_seq;
- int datalen;
- u32 skb_seq;
-
- datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
- if (!datalen)
- return true;
-
- mlx5e_tx_mpwqe_ensure_complete(sq);
-
- tls_ctx = tls_get_ctx(skb->sk);
- if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
- goto err_out;
-
- if (mlx5e_accel_is_ktls_tx(sq->mdev))
- return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state);
-
- /* FPGA */
- skb_seq = ntohl(tcp_hdr(skb)->seq);
- context = mlx5e_get_tls_tx_context(tls_ctx);
- expected_seq = context->expected_seq;
-
- if (unlikely(expected_seq != skb_seq))
- return mlx5e_tls_handle_ooo(context, sq, skb, priv->tls);
-
- if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
- atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
- dev_kfree_skb_any(skb);
- return false;
- }
-
- context->expected_seq = skb_seq + datalen;
- return true;
-
-err_out:
- dev_kfree_skb_any(skb);
- return false;
-}
-
-static int tls_update_resync_sn(struct net_device *netdev,
- struct sk_buff *skb,
- struct mlx5e_tls_metadata *mdata)
-{
- struct sock *sk = NULL;
- struct iphdr *iph;
- struct tcphdr *th;
- __be32 seq;
-
- if (mdata->ethertype != htons(ETH_P_IP))
- return -EINVAL;
-
- iph = (struct iphdr *)(mdata + 1);
-
- th = ((void *)iph) + iph->ihl * 4;
-
- if (iph->version == 4) {
- sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
- iph->saddr, th->source, iph->daddr,
- th->dest, netdev->ifindex);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
-
- sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
- &ipv6h->saddr, th->source,
- &ipv6h->daddr, ntohs(th->dest),
- netdev->ifindex, 0);
-#endif
- }
- if (!sk || sk->sk_state == TCP_TIME_WAIT) {
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request);
- goto out;
- }
-
- skb->sk = sk;
- skb->destructor = sock_edemux;
-
- memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq));
- tls_offload_rx_resync_request(sk, seq);
-out:
- return 0;
-}
-
-/* FPGA tls rx handler */
-void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb,
- u32 *cqe_bcnt)
-{
- struct mlx5e_tls_metadata *mdata;
- struct mlx5e_priv *priv;
-
- /* Use the metadata */
- mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN);
- switch (mdata->content.recv.syndrome) {
- case SYNDROM_DECRYPTED:
- skb->decrypted = 1;
- break;
- case SYNDROM_RESYNC_REQUEST:
- tls_update_resync_sn(rq->netdev, skb, mdata);
- priv = netdev_priv(rq->netdev);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request);
- break;
- case SYNDROM_AUTH_FAILED:
- /* Authentication failure will be observed and verified by kTLS */
- priv = netdev_priv(rq->netdev);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail);
- break;
- default:
- /* Bypass the metadata header to others */
- return;
- }
-
- remove_metadata_hdr(skb);
- *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
-}
-
-u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
-{
- if (!mlx5e_accel_is_tls_device(mdev))
- return 0;
-
- if (mlx5e_accel_is_ktls_device(mdev))
- return mlx5e_ktls_get_stop_room(mdev, params);
-
- /* FPGA */
- /* Resync SKB. */
- return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
deleted file mode 100644
index 0ca0a023fb8d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5E_TLS_RXTX_H__
-#define __MLX5E_TLS_RXTX_H__
-
-#include "accel/accel.h"
-#include "en_accel/ktls_txrx.h"
-
-#ifdef CONFIG_MLX5_EN_TLS
-
-#include <linux/skbuff.h>
-#include "en.h"
-#include "en/txrx.h"
-
-u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-
-bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state);
-
-static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb)
-{
- return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
-}
-
-static inline void
-mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
- struct mlx5e_accel_tx_tls_state *state)
-{
- cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
-}
-
-void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb,
- u32 *cqe_bcnt);
-
-static inline void
-mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
- struct mlx5_cqe64 *cqe, u32 *cqe_bcnt)
-{
- if (unlikely(get_cqe_tls_offload(cqe))) /* cqe bit indicates a TLS device */
- return mlx5e_ktls_handle_rx_skb(rq, skb, cqe, cqe_bcnt);
-
- if (unlikely(test_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state) && is_metadata_hdr_valid(skb)))
- return mlx5e_tls_handle_rx_skb_metadata(rq, skb, cqe_bcnt);
-}
-
-#else
-
-static inline bool
-mlx5e_accel_is_tls(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return false; }
-static inline void
-mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
- struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) {}
-static inline u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
-{
- return 0;
-}
-
-#endif /* CONFIG_MLX5_EN_TLS */
-
-#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index fe5d82fa6e92..0ae1865086ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -114,47 +114,49 @@ static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
}
}
-static int arfs_disable(struct mlx5e_priv *priv)
+static int arfs_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
/* Modify ttc rules destination back to their default */
- err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, arfs_get_tt(i), err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, arfs_get_tt(i), err);
return err;
}
}
return 0;
}
-static void arfs_del_rules(struct mlx5e_priv *priv);
+static void arfs_del_rules(struct mlx5e_flow_steering *fs);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv)
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
{
- arfs_del_rules(priv);
+ arfs_del_rules(fs);
- return arfs_disable(priv);
+ return arfs_disable(fs);
}
-int mlx5e_arfs_enable(struct mlx5e_priv *priv)
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
+ dest.ft = arfs->arfs_tables[i].ft.t;
/* Modify ttc rules destination to point on the aRFS FTs */
- err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
- __func__, arfs_get_tt(i), err);
- arfs_disable(priv);
+ fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
+ __func__, arfs_get_tt(i), err);
+ arfs_disable(fs);
return err;
}
}
@@ -167,31 +169,37 @@ static void arfs_destroy_table(struct arfs_table *arfs_t)
mlx5e_destroy_flow_table(&arfs_t->ft);
}
-static void _mlx5e_cleanup_tables(struct mlx5e_priv *priv)
+static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
int i;
- arfs_del_rules(priv);
- destroy_workqueue(priv->fs.arfs->wq);
+ arfs_del_rules(fs);
+ destroy_workqueue(arfs->wq);
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- if (!IS_ERR_OR_NULL(priv->fs.arfs->arfs_tables[i].ft.t))
- arfs_destroy_table(&priv->fs.arfs->arfs_tables[i]);
+ if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t))
+ arfs_destroy_table(&arfs->arfs_tables[i]);
}
}
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple)
{
- if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+
+ if (!ntuple)
return;
- _mlx5e_cleanup_tables(priv);
- kvfree(priv->fs.arfs);
+ _mlx5e_cleanup_tables(fs);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
}
-static int arfs_add_default_rule(struct mlx5e_priv *priv,
+static int arfs_add_default_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
enum arfs_type type)
{
- struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct arfs_table *arfs_t = &arfs->arfs_tables[type];
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
enum mlx5_traffic_types tt;
@@ -200,23 +208,21 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
tt = arfs_get_tt(type);
if (tt == -EINVAL) {
- netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
- __func__, type);
+ fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type);
return -EINVAL;
}
/* FIXME: Must use mlx5_ttc_get_default_dest(),
* but can't since TTC default is not setup yet !
*/
- dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
+ dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt);
arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
&flow_act,
&dest, 1);
if (IS_ERR(arfs_t->default_rule)) {
err = PTR_ERR(arfs_t->default_rule);
arfs_t->default_rule = NULL;
- netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
- __func__, type);
+ fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type);
}
return err;
@@ -318,10 +324,12 @@ out:
return err;
}
-static int arfs_create_table(struct mlx5e_priv *priv,
+static int arfs_create_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
enum arfs_type type)
{
- struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -332,7 +340,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
ft_attr.level = MLX5E_ARFS_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
@@ -343,7 +351,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
if (err)
goto err;
- err = arfs_add_default_rule(priv, type);
+ err = arfs_add_default_rule(fs, rx_res, type);
if (err)
goto err;
@@ -353,35 +361,40 @@ err:
return err;
}
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
{
+ struct mlx5e_arfs_tables *arfs;
int err = -ENOMEM;
int i;
- if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ if (!ntuple)
return 0;
- priv->fs.arfs = kvzalloc(sizeof(*priv->fs.arfs), GFP_KERNEL);
- if (!priv->fs.arfs)
+ arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL);
+ if (!arfs)
return -ENOMEM;
- spin_lock_init(&priv->fs.arfs->arfs_lock);
- INIT_LIST_HEAD(&priv->fs.arfs->rules);
- priv->fs.arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
- if (!priv->fs.arfs->wq)
+ spin_lock_init(&arfs->arfs_lock);
+ INIT_LIST_HEAD(&arfs->rules);
+ arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
+ if (!arfs->wq)
goto err;
+ mlx5e_fs_set_arfs(fs, arfs);
+
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- err = arfs_create_table(priv, i);
+ err = arfs_create_table(fs, rx_res, i);
if (err)
goto err_des;
}
return 0;
err_des:
- _mlx5e_cleanup_tables(priv);
+ _mlx5e_cleanup_tables(fs);
err:
- kvfree(priv->fs.arfs);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
return err;
}
@@ -389,6 +402,7 @@ err:
static void arfs_may_expire_flow(struct mlx5e_priv *priv)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_rule *arfs_rule;
struct hlist_node *htmp;
HLIST_HEAD(del_list);
@@ -396,8 +410,8 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
int i;
int j;
- spin_lock_bh(&priv->fs.arfs->arfs_lock);
- mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) {
if (!work_pending(&arfs_rule->arfs_work) &&
rps_may_expire_flow(priv->netdev,
arfs_rule->rxq, arfs_rule->flow_id,
@@ -408,7 +422,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
break;
}
}
- spin_unlock_bh(&priv->fs.arfs->arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
if (arfs_rule->rule)
mlx5_del_flow_rules(arfs_rule->rule);
@@ -417,20 +431,21 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
}
}
-static void arfs_del_rules(struct mlx5e_priv *priv)
+static void arfs_del_rules(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct hlist_node *htmp;
struct arfs_rule *rule;
HLIST_HEAD(del_list);
int i;
int j;
- spin_lock_bh(&priv->fs.arfs->arfs_lock);
- mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
hlist_add_head(&rule->hlist, &del_list);
}
- spin_unlock_bh(&priv->fs.arfs->arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
cancel_work_sync(&rule->arfs_work);
@@ -474,7 +489,7 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
struct arfs_rule *arfs_rule)
{
- struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_tuple *tuple = &arfs_rule->tuple;
struct mlx5_flow_handle *rule = NULL;
struct mlx5_flow_destination dest = {};
@@ -556,7 +571,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
+ priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++;
mlx5e_dbg(HW, priv,
"%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
__func__, arfs_rule->filter_id, arfs_rule->rxq,
@@ -588,13 +603,15 @@ static void arfs_handle_work(struct work_struct *work)
struct arfs_rule,
arfs_work);
struct mlx5e_priv *priv = arfs_rule->priv;
+ struct mlx5e_arfs_tables *arfs;
struct mlx5_flow_handle *rule;
+ arfs = mlx5e_fs_get_arfs(priv->fs);
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- spin_lock_bh(&priv->fs.arfs->arfs_lock);
+ spin_lock_bh(&arfs->arfs_lock);
hlist_del(&arfs_rule->hlist);
- spin_unlock_bh(&priv->fs.arfs->arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
mutex_unlock(&priv->state_lock);
kfree(arfs_rule);
@@ -620,6 +637,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
const struct flow_keys *fk,
u16 rxq, u32 flow_id)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_rule *rule;
struct arfs_tuple *tuple;
@@ -647,7 +665,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
tuple->dst_port = fk->ports.dst;
rule->flow_id = flow_id;
- rule->filter_id = priv->fs.arfs->last_filter_id++ % RPS_NO_FILTER;
+ rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER;
hlist_add_head(&rule->hlist,
arfs_hash_bucket(arfs_t, tuple->src_port,
@@ -691,11 +709,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
- struct arfs_table *arfs_t;
+ struct mlx5e_arfs_tables *arfs;
struct arfs_rule *arfs_rule;
+ struct arfs_table *arfs_t;
struct flow_keys fk;
+ arfs = mlx5e_fs_get_arfs(priv->fs);
if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
return -EPROTONOSUPPORT;
@@ -725,7 +744,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
return -ENOMEM;
}
}
- queue_work(priv->fs.arfs->wq, &arfs_rule->arfs_work);
+ queue_work(arfs->wq, &arfs_rule->arfs_work);
spin_unlock_bh(&arfs->arfs_lock);
return arfs_rule->filter_id;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index c0f409c195bf..68f19324db93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -46,8 +46,7 @@ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
}
-static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- u32 *mkey)
+int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
void *mkc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index a4c8d8d00d5a..2449731b7d79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1026,15 +1026,6 @@ void mlx5e_dcbnl_build_netdev(struct net_device *netdev)
netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
}
-void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
-
- if (MLX5_CAP_GEN(mdev, qos))
- netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
-}
-
static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
enum mlx5_dcbx_oper_mode *mode)
{
@@ -1142,7 +1133,7 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
err = mlx5_set_trust_state(priv->mdev, *trust_state);
if (err)
return err;
- priv->dcbx_dp.trust_state = *trust_state;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state);
return 0;
}
@@ -1187,16 +1178,28 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ u8 trust_state;
int err;
- priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
-
- if (!MLX5_DSCP_SUPPORTED(mdev))
+ if (!MLX5_DSCP_SUPPORTED(mdev)) {
+ WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP);
return 0;
+ }
- err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
+ err = mlx5_query_trust_state(priv->mdev, &trust_state);
if (err)
return err;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
+
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) {
+ /*
+ * Align the driver state with the register state.
+ * Temporary state change is required to enable the app list reset.
+ */
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP;
+ mlx5e_dcbnl_delete_app(priv);
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+ }
mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
priv->dcbx_dp.trust_state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c2ea5fad48dd..24aa25da482b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -30,24 +30,27 @@
* SOFTWARE.
*/
+#include <linux/ethtool_netlink.h>
+
#include "en.h"
#include "en/port.h"
#include "en/params.h"
#include "en/xsk/pool.h"
#include "en/ptp.h"
#include "lib/clock.h"
+#include "en/fs_ethtool.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
struct ethtool_drvinfo *drvinfo)
{
struct mlx5_core_dev *mdev = priv->mdev;
- strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+ strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
mdev->board_id);
- strlcpy(drvinfo->bus_info, dev_name(mdev->device),
+ strscpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}
@@ -305,20 +308,36 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
}
void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param)
{
- param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
+ /* Limitation for regular RQ. XSK RQ may clamp the queue length in
+ * mlx5e_mpwqe_get_log_rq_size.
+ */
+ u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev,
+ PAGE_SHIFT,
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED);
+
+ param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ max_log_mpwrq_pkts);
param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
param->tx_pending = 1 << priv->channels.params.log_sq_size;
+
+ kernel_param->tcp_data_split =
+ (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ?
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED :
+ ETHTOOL_TCP_DATA_SPLIT_DISABLED;
}
static void mlx5e_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- mlx5e_ethtool_get_ringparam(priv, param);
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
}
int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
@@ -380,7 +399,9 @@ unlock:
}
static int mlx5e_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -447,7 +468,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
*/
- if (priv->htb.maj_id) {
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
err = -EINVAL;
netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
__func__);
@@ -482,14 +503,14 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE);
if (arfs_enabled)
- mlx5e_arfs_disable(priv);
+ mlx5e_arfs_disable(priv->fs);
/* Switch to new channels, set new parameters and close old ones */
err = mlx5e_safe_switch_params(priv, &new_params,
mlx5e_num_channels_changed_ctx, NULL, true);
if (arfs_enabled) {
- int err2 = mlx5e_arfs_enable(priv);
+ int err2 = mlx5e_arfs_enable(priv->fs);
if (err2)
netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
@@ -511,7 +532,8 @@ static int mlx5e_set_channels(struct net_device *dev,
}
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal)
{
struct dim_cq_moder *rx_moder, *tx_moder;
@@ -528,6 +550,11 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
coal->tx_max_coalesced_frames = tx_moder->pkts;
coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled;
+ kernel_coal->use_cqe_mode_rx =
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER);
+ kernel_coal->use_cqe_mode_tx =
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER);
+
return 0;
}
@@ -538,7 +565,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_get_coalesce(priv, coal);
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
}
#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD
@@ -578,14 +605,26 @@ mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal
}
}
+/* convert a boolean value of cq_mode to mlx5 period mode
+ * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE
+ * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE
+ */
+static int cqe_mode_to_period_mode(bool val)
+{
+ return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct dim_cq_moder *rx_moder, *tx_moder;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_params new_params;
bool reset_rx, reset_tx;
bool reset = true;
+ u8 cq_period_mode;
int err = 0;
if (!MLX5_CAP_GEN(mdev, cq_moderation))
@@ -605,6 +644,12 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
return -ERANGE;
}
+ if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) &&
+ !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) {
+ NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device");
+ return -EOPNOTSUPP;
+ }
+
mutex_lock(&priv->state_lock);
new_params = priv->channels.params;
@@ -621,6 +666,18 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
+ cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx);
+ if (cq_period_mode != rx_moder->cq_period_mode) {
+ mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode);
+ reset_rx = true;
+ }
+
+ cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx);
+ if (cq_period_mode != tx_moder->cq_period_mode) {
+ mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode);
+ reset_tx = true;
+ }
+
if (reset_rx) {
u8 mode = MLX5E_GET_PFLAG(&new_params,
MLX5E_PFLAG_RX_CQE_BASED_MODER);
@@ -656,9 +713,9 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_set_coalesce(priv, coal);
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
}
static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
@@ -1752,7 +1809,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
if (size_read < 0) {
netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
__func__, size_read);
- return 0;
+ return size_read;
}
i += size_read;
@@ -1843,24 +1900,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_params new_params;
- bool mode_changed;
u8 cq_period_mode, current_cq_period_mode;
+ struct mlx5e_params new_params;
+
+ if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ cq_period_mode = cqe_mode_to_period_mode(enable);
- cq_period_mode = enable ?
- MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
- MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
current_cq_period_mode = is_rx_cq ?
priv->channels.params.rx_cq_moderation.cq_period_mode :
priv->channels.params.tx_cq_moderation.cq_period_mode;
- mode_changed = cq_period_mode != current_cq_period_mode;
-
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
- !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
- return -EOPNOTSUPP;
- if (!mode_changed)
+ if (cq_period_mode == current_cq_period_mode)
return 0;
new_params = priv->channels.params;
@@ -1894,7 +1946,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
if (curr_val == new_val)
return 0;
- if (new_val && !priv->profile->rx_ptp_support && rx_filter) {
+ if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) {
netdev_err(priv->netdev,
"Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n");
return -EINVAL;
@@ -1953,10 +2005,14 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
struct mlx5e_params new_params;
if (enable) {
- if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
- return -EOPNOTSUPP;
- if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
- return -EINVAL;
+ /* Checking the regular RQ here; mlx5e_validate_xsk_param called
+ * from mlx5e_open_xsk will check for each XSK queue, and
+ * mlx5e_safe_switch_params will be reverted if any check fails.
+ */
+ int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params);
+
+ if (err)
+ return err;
} else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
return -EINVAL;
@@ -2032,7 +2088,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
* the numeration of the QoS SQs will change, while per-queue qdiscs are
* attached.
*/
- if (priv->htb.maj_id) {
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
__func__);
return -EINVAL;
@@ -2358,7 +2414,8 @@ static void mlx5e_get_rmon_stats(struct net_device *netdev,
const struct ethtool_ops mlx5e_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES |
- ETHTOOL_COALESCE_USE_ADAPTIVE,
+ ETHTOOL_COALESCE_USE_ADAPTIVE |
+ ETHTOOL_COALESCE_USE_CQE,
.get_drvinfo = mlx5e_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_link_ext_state = mlx5e_get_link_ext_state,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index aeff1d972a46..1892ccb889b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -36,14 +36,42 @@
#include <linux/tcp.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/mpfs.h>
-#include "en.h"
-#include "en_rep.h"
+#include "en_tc.h"
#include "lib/mpfs.h"
#include "en/ptp.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_flow_steering {
+ struct work_struct set_rx_mode_work;
+ bool state_destroy;
+ bool vlan_strip_disable;
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_namespace *egress_ns;
+#ifdef CONFIG_MLX5_EN_RXNFC
+ struct mlx5e_ethtool_steering *ethtool;
+#endif
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_promisc_table promisc;
+ struct mlx5e_vlan_table *vlan;
+ struct mlx5e_l2_table l2;
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_ttc_table *inner_ttc;
+#ifdef CONFIG_MLX5_EN_ARFS
+ struct mlx5e_arfs_tables *arfs;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_accel_fs_tcp *accel_tcp;
+#endif
+ struct mlx5e_fs_udp *udp;
+ struct mlx5e_fs_any *any;
+ struct mlx5e_ptp_fs *ptp_fs;
+};
-static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai, int type);
-static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai);
enum {
@@ -132,9 +160,8 @@ struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan)
return vlan->ft.t;
}
-static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs)
{
- struct net_device *ndev = priv->netdev;
int max_list_size;
int list_size;
u16 *vlans;
@@ -143,35 +170,34 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
int i;
list_size = 0;
- for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID)
+ for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID)
list_size++;
- max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+ max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list);
if (list_size > max_list_size) {
- netdev_warn(ndev,
- "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
- list_size, max_list_size);
+ fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
list_size = max_list_size;
}
- vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
if (!vlans)
return -ENOMEM;
i = 0;
- for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID) {
+ for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) {
if (i >= list_size)
break;
vlans[i++] = vlan;
}
- err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+ err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size);
if (err)
- netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
- err);
+ fs_err(fs, "Failed to modify vport vlans list err(%d)\n",
+ err);
- kfree(vlans);
+ kvfree(vlans);
return err;
}
@@ -183,18 +209,18 @@ enum mlx5e_vlan_rule_type {
MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID,
};
-static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
enum mlx5e_vlan_rule_type rule_type,
u16 vid, struct mlx5_flow_spec *spec)
{
- struct mlx5_flow_table *ft = priv->fs.vlan->ft.t;
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle **rule_p;
MLX5_DECLARE_FLOW_ACT(flow_act);
int err = 0;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.l2.ft.t;
+ dest.ft = fs->l2.ft.t;
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
@@ -204,24 +230,24 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
* disabled in match value means both S & C tags
* don't exist (untagged of both)
*/
- rule_p = &priv->fs.vlan->untagged_rule;
+ rule_p = &fs->vlan->untagged_rule;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
break;
case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
- rule_p = &priv->fs.vlan->any_cvlan_rule;
+ rule_p = &fs->vlan->any_cvlan_rule;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
break;
case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
- rule_p = &priv->fs.vlan->any_svlan_rule;
+ rule_p = &fs->vlan->any_svlan_rule;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.svlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
break;
case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
- rule_p = &priv->fs.vlan->active_svlans_rule[vid];
+ rule_p = &fs->vlan->active_svlans_rule[vid];
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.svlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
@@ -231,7 +257,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
vid);
break;
default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
- rule_p = &priv->fs.vlan->active_cvlans_rule[vid];
+ rule_p = &fs->vlan->active_cvlans_rule[vid];
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
@@ -250,13 +276,13 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
if (IS_ERR(*rule_p)) {
err = PTR_ERR(*rule_p);
*rule_p = NULL;
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ fs_err(fs, "%s: add rule failed\n", __func__);
}
return err;
}
-static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+static int mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
enum mlx5e_vlan_rule_type rule_type, u16 vid)
{
struct mlx5_flow_spec *spec;
@@ -267,68 +293,68 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
return -ENOMEM;
if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID)
- mlx5e_vport_context_update_vlans(priv);
+ mlx5e_vport_context_update_vlans(fs);
- err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
+ err = __mlx5e_add_vlan_rule(fs, rule_type, vid, spec);
kvfree(spec);
return err;
}
-static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
- enum mlx5e_vlan_rule_type rule_type, u16 vid)
+static void mlx5e_fs_del_vlan_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
{
switch (rule_type) {
case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
- if (priv->fs.vlan->untagged_rule) {
- mlx5_del_flow_rules(priv->fs.vlan->untagged_rule);
- priv->fs.vlan->untagged_rule = NULL;
+ if (fs->vlan->untagged_rule) {
+ mlx5_del_flow_rules(fs->vlan->untagged_rule);
+ fs->vlan->untagged_rule = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
- if (priv->fs.vlan->any_cvlan_rule) {
- mlx5_del_flow_rules(priv->fs.vlan->any_cvlan_rule);
- priv->fs.vlan->any_cvlan_rule = NULL;
+ if (fs->vlan->any_cvlan_rule) {
+ mlx5_del_flow_rules(fs->vlan->any_cvlan_rule);
+ fs->vlan->any_cvlan_rule = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
- if (priv->fs.vlan->any_svlan_rule) {
- mlx5_del_flow_rules(priv->fs.vlan->any_svlan_rule);
- priv->fs.vlan->any_svlan_rule = NULL;
+ if (fs->vlan->any_svlan_rule) {
+ mlx5_del_flow_rules(fs->vlan->any_svlan_rule);
+ fs->vlan->any_svlan_rule = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
- if (priv->fs.vlan->active_svlans_rule[vid]) {
- mlx5_del_flow_rules(priv->fs.vlan->active_svlans_rule[vid]);
- priv->fs.vlan->active_svlans_rule[vid] = NULL;
+ if (fs->vlan->active_svlans_rule[vid]) {
+ mlx5_del_flow_rules(fs->vlan->active_svlans_rule[vid]);
+ fs->vlan->active_svlans_rule[vid] = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
- if (priv->fs.vlan->active_cvlans_rule[vid]) {
- mlx5_del_flow_rules(priv->fs.vlan->active_cvlans_rule[vid]);
- priv->fs.vlan->active_cvlans_rule[vid] = NULL;
+ if (fs->vlan->active_cvlans_rule[vid]) {
+ mlx5_del_flow_rules(fs->vlan->active_cvlans_rule[vid]);
+ fs->vlan->active_cvlans_rule[vid] = NULL;
}
- mlx5e_vport_context_update_vlans(priv);
+ mlx5e_vport_context_update_vlans(fs);
break;
}
}
-static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv)
+static void mlx5e_fs_del_any_vid_rules(struct mlx5e_flow_steering *fs)
{
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
}
-static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv)
+static int mlx5e_fs_add_any_vid_rules(struct mlx5e_flow_steering *fs)
{
int err;
- err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
if (err)
return err;
- return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+ return mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
}
static struct mlx5_flow_handle *
@@ -352,103 +378,103 @@ mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num)
return rule;
}
-int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num)
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
{
- struct mlx5_flow_table *ft = priv->fs.vlan->ft.t;
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
struct mlx5_flow_handle *rule;
int err;
rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- priv->fs.vlan->trap_rule = NULL;
- netdev_err(priv->netdev, "%s: add VLAN trap rule failed, err %d\n",
- __func__, err);
+ fs->vlan->trap_rule = NULL;
+ fs_err(fs, "%s: add VLAN trap rule failed, err %d\n",
+ __func__, err);
return err;
}
- priv->fs.vlan->trap_rule = rule;
+ fs->vlan->trap_rule = rule;
return 0;
}
-void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv)
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs)
{
- if (priv->fs.vlan->trap_rule) {
- mlx5_del_flow_rules(priv->fs.vlan->trap_rule);
- priv->fs.vlan->trap_rule = NULL;
+ if (fs->vlan->trap_rule) {
+ mlx5_del_flow_rules(fs->vlan->trap_rule);
+ fs->vlan->trap_rule = NULL;
}
}
-int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num)
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
{
- struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
struct mlx5_flow_handle *rule;
int err;
rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- priv->fs.l2.trap_rule = NULL;
- netdev_err(priv->netdev, "%s: add MAC trap rule failed, err %d\n",
- __func__, err);
+ fs->l2.trap_rule = NULL;
+ fs_err(fs, "%s: add MAC trap rule failed, err %d\n",
+ __func__, err);
return err;
}
- priv->fs.l2.trap_rule = rule;
+ fs->l2.trap_rule = rule;
return 0;
}
-void mlx5e_remove_mac_trap(struct mlx5e_priv *priv)
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs)
{
- if (priv->fs.l2.trap_rule) {
- mlx5_del_flow_rules(priv->fs.l2.trap_rule);
- priv->fs.l2.trap_rule = NULL;
+ if (fs->l2.trap_rule) {
+ mlx5_del_flow_rules(fs->l2.trap_rule);
+ fs->l2.trap_rule = NULL;
}
}
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (!priv->fs.vlan->cvlan_filter_disabled)
+ if (!fs->vlan->cvlan_filter_disabled)
return;
- priv->fs.vlan->cvlan_filter_disabled = false;
- if (priv->netdev->flags & IFF_PROMISC)
+ fs->vlan->cvlan_filter_disabled = false;
+ if (promisc)
return;
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
}
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (priv->fs.vlan->cvlan_filter_disabled)
+ if (fs->vlan->cvlan_filter_disabled)
return;
- priv->fs.vlan->cvlan_filter_disabled = true;
- if (priv->netdev->flags & IFF_PROMISC)
+ fs->vlan->cvlan_filter_disabled = true;
+ if (promisc)
return;
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
}
-static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid)
+static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid)
{
int err;
- set_bit(vid, priv->fs.vlan->active_cvlans);
+ set_bit(vid, fs->vlan->active_cvlans);
- err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
if (err)
- clear_bit(vid, priv->fs.vlan->active_cvlans);
+ clear_bit(vid, fs->vlan->active_cvlans);
return err;
}
-static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
+static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev, u16 vid)
{
- struct net_device *netdev = priv->netdev;
int err;
- set_bit(vid, priv->fs.vlan->active_svlans);
+ set_bit(vid, fs->vlan->active_svlans);
- err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
if (err) {
- clear_bit(vid, priv->fs.vlan->active_svlans);
+ clear_bit(vid, fs->vlan->active_svlans);
return err;
}
@@ -457,86 +483,91 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
return err;
}
-int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
- if (mlx5e_is_uplink_rep(priv))
- return 0; /* no vlan table for uplink rep */
+ if (!fs->vlan) {
+ fs_err(fs, "Vlan doesn't exist\n");
+ return -EINVAL;
+ }
if (be16_to_cpu(proto) == ETH_P_8021Q)
- return mlx5e_vlan_rx_add_cvid(priv, vid);
+ return mlx5e_vlan_rx_add_cvid(fs, vid);
else if (be16_to_cpu(proto) == ETH_P_8021AD)
- return mlx5e_vlan_rx_add_svid(priv, vid);
+ return mlx5e_vlan_rx_add_svid(fs, netdev, vid);
return -EOPNOTSUPP;
}
-int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- if (mlx5e_is_uplink_rep(priv))
- return 0; /* no vlan table for uplink rep */
+ if (!fs->vlan) {
+ fs_err(fs, "Vlan doesn't exist\n");
+ return -EINVAL;
+ }
if (be16_to_cpu(proto) == ETH_P_8021Q) {
- clear_bit(vid, priv->fs.vlan->active_cvlans);
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ clear_bit(vid, fs->vlan->active_cvlans);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
} else if (be16_to_cpu(proto) == ETH_P_8021AD) {
- clear_bit(vid, priv->fs.vlan->active_svlans);
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
- netdev_update_features(dev);
+ clear_bit(vid, fs->vlan->active_svlans);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ netdev_update_features(netdev);
}
return 0;
}
-static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs)
{
int i;
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
- for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) {
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
}
- for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID)
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
- if (priv->fs.vlan->cvlan_filter_disabled)
- mlx5e_add_any_vid_rules(priv);
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_add_any_vid_rules(fs);
}
-static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs)
{
int i;
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
- for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) {
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
}
- for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID)
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
- WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
+ WARN_ON_ONCE(fs->state_destroy);
- mlx5e_remove_vlan_trap(priv);
+ mlx5e_remove_vlan_trap(fs);
/* must be called after DESTROY bit is set and
* set_rx_mode is called and flushed
*/
- if (priv->fs.vlan->cvlan_filter_disabled)
- mlx5e_del_any_vid_rules(priv);
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_del_any_vid_rules(fs);
}
#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
-static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
+static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_hash_node *hn)
{
u8 action = hn->action;
@@ -547,9 +578,9 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
switch (action) {
case MLX5E_ACTION_ADD:
- mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+ mlx5e_add_l2_flow_rule(fs, &hn->ai, MLX5E_FULLMATCH);
if (!is_multicast_ether_addr(mac_addr)) {
- l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr);
+ l2_err = mlx5_mpfs_add_mac(fs->mdev, mac_addr);
hn->mpfs = !l2_err;
}
hn->action = MLX5E_ACTION_NONE;
@@ -557,52 +588,51 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
case MLX5E_ACTION_DEL:
if (!is_multicast_ether_addr(mac_addr) && hn->mpfs)
- l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr);
- mlx5e_del_l2_flow_rule(priv, &hn->ai);
+ l2_err = mlx5_mpfs_del_mac(fs->mdev, mac_addr);
+ mlx5e_del_l2_flow_rule(fs, &hn->ai);
mlx5e_del_l2_from_hash(hn);
break;
}
if (l2_err)
- netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
- action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err);
+ fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n",
+ action == MLX5E_ACTION_ADD ? "add" : "del",
+ mac_addr, l2_err);
}
-static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
+static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
- struct net_device *netdev = priv->netdev;
struct netdev_hw_addr *ha;
netif_addr_lock_bh(netdev);
- mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc,
- priv->netdev->dev_addr);
-
+ mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr);
netdev_for_each_uc_addr(ha, netdev)
- mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr);
+ mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr);
netdev_for_each_mc_addr(ha, netdev)
- mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr);
+ mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr);
netif_addr_unlock_bh(netdev);
}
-static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type,
+ struct net_device *ndev,
u8 addr_array[][ETH_ALEN], int size)
{
bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
- struct net_device *ndev = priv->netdev;
struct mlx5e_l2_hash_node *hn;
struct hlist_head *addr_list;
struct hlist_node *tmp;
int i = 0;
int hi;
- addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+ addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc;
if (is_uc) /* Make sure our own address is pushed first */
ether_addr_copy(addr_array[i++], ndev->dev_addr);
- else if (priv->fs.l2.broadcast_enabled)
+ else if (fs->l2.broadcast_enabled)
ether_addr_copy(addr_array[i++], ndev->broadcast);
mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
@@ -614,7 +644,8 @@ static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
}
}
-static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
int list_type)
{
bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
@@ -627,19 +658,18 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
int err;
int hi;
- size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0);
+ size = is_uc ? 0 : (fs->l2.broadcast_enabled ? 1 : 0);
max_size = is_uc ?
- 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
- 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+ 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_mc_list);
- addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+ addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc;
mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
size++;
if (size > max_size) {
- netdev_warn(priv->netdev,
- "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
- is_uc ? "UC" : "MC", size, max_size);
+ fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
size = max_size;
}
@@ -649,65 +679,66 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
err = -ENOMEM;
goto out;
}
- mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+ mlx5e_fill_addr_array(fs, list_type, netdev, addr_array, size);
}
- err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+ err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size);
out:
if (err)
- netdev_err(priv->netdev,
- "Failed to modify vport %s list err(%d)\n",
- is_uc ? "UC" : "MC", err);
+ fs_err(fs, "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
kfree(addr_array);
}
-static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+static void mlx5e_vport_context_update(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
- struct mlx5e_l2_table *ea = &priv->fs.l2;
+ struct mlx5e_l2_table *ea = &fs->l2;
- mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC);
- mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC);
- mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+ mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(fs->mdev, 0,
ea->allmulti_enabled,
ea->promisc_enabled);
}
-static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
+static void mlx5e_apply_netdev_addr(struct mlx5e_flow_steering *fs)
{
struct mlx5e_l2_hash_node *hn;
struct hlist_node *tmp;
int i;
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
- mlx5e_execute_l2_action(priv, hn);
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i)
+ mlx5e_execute_l2_action(fs, hn);
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
- mlx5e_execute_l2_action(priv, hn);
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i)
+ mlx5e_execute_l2_action(fs, hn);
}
-static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv)
+static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
struct mlx5e_l2_hash_node *hn;
struct hlist_node *tmp;
int i;
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i)
hn->action = MLX5E_ACTION_DEL;
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i)
hn->action = MLX5E_ACTION_DEL;
- if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state))
- mlx5e_sync_netdev_addr(priv);
+ if (fs->state_destroy)
+ mlx5e_sync_netdev_addr(fs, netdev);
- mlx5e_apply_netdev_addr(priv);
+ mlx5e_apply_netdev_addr(fs);
}
#define MLX5E_PROMISC_GROUP0_SIZE BIT(0)
#define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE
-static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv)
+static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs)
{
- struct mlx5_flow_table *ft = priv->fs.promisc.ft.t;
+ struct mlx5_flow_table *ft = fs->promisc.ft.t;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle **rule_p;
MLX5_DECLARE_FLOW_ACT(flow_act);
@@ -718,22 +749,22 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv)
if (!spec)
return -ENOMEM;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
+ dest.ft = mlx5_get_ttc_flow_table(fs->ttc);
- rule_p = &priv->fs.promisc.rule;
+ rule_p = &fs->promisc.rule;
*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(*rule_p)) {
err = PTR_ERR(*rule_p);
*rule_p = NULL;
- netdev_err(priv->netdev, "%s: add promiscuous rule failed\n", __func__);
+ fs_err(fs, "%s: add promiscuous rule failed\n", __func__);
}
kvfree(spec);
return err;
}
-static int mlx5e_create_promisc_table(struct mlx5e_priv *priv)
+static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_flow_table *ft = &priv->fs.promisc.ft;
+ struct mlx5e_flow_table *ft = &fs->promisc.ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -742,14 +773,14 @@ static int mlx5e_create_promisc_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_PROMISC_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
- netdev_err(priv->netdev, "fail to create promisc table err=%d\n", err);
+ fs_err(fs, "fail to create promisc table err=%d\n", err);
return err;
}
- err = mlx5e_add_promisc_rule(priv);
+ err = mlx5e_add_promisc_rule(fs);
if (err)
goto err_destroy_promisc_table;
@@ -762,34 +793,31 @@ err_destroy_promisc_table:
return err;
}
-static void mlx5e_del_promisc_rule(struct mlx5e_priv *priv)
+static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs)
{
- if (WARN(!priv->fs.promisc.rule, "Trying to remove non-existing promiscuous rule"))
+ if (WARN(!fs->promisc.rule, "Trying to remove non-existing promiscuous rule"))
return;
- mlx5_del_flow_rules(priv->fs.promisc.rule);
- priv->fs.promisc.rule = NULL;
+ mlx5_del_flow_rules(fs->promisc.rule);
+ fs->promisc.rule = NULL;
}
-static void mlx5e_destroy_promisc_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs)
{
- if (WARN(!priv->fs.promisc.ft.t, "Trying to remove non-existing promiscuous table"))
+ if (WARN(!fs->promisc.ft.t, "Trying to remove non-existing promiscuous table"))
return;
- mlx5e_del_promisc_rule(priv);
- mlx5_destroy_flow_table(priv->fs.promisc.ft.t);
- priv->fs.promisc.ft.t = NULL;
+ mlx5e_del_promisc_rule(fs);
+ mlx5_destroy_flow_table(fs->promisc.ft.t);
+ fs->promisc.ft.t = NULL;
}
-void mlx5e_set_rx_mode_work(struct work_struct *work)
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
- struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
- set_rx_mode_work);
-
- struct mlx5e_l2_table *ea = &priv->fs.l2;
- struct net_device *ndev = priv->netdev;
+ struct mlx5e_l2_table *ea = &fs->l2;
- bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
- bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC);
- bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI);
+ bool rx_mode_enable = fs->state_destroy;
+ bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI);
bool broadcast_enabled = rx_mode_enable;
bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
@@ -801,32 +829,32 @@ void mlx5e_set_rx_mode_work(struct work_struct *work)
int err;
if (enable_promisc) {
- err = mlx5e_create_promisc_table(priv);
+ err = mlx5e_create_promisc_table(fs);
if (err)
enable_promisc = false;
- if (!priv->channels.params.vlan_strip_disable && !err)
- netdev_warn_once(ndev,
- "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
+ if (!fs->vlan_strip_disable && !err)
+ fs_warn_once(fs,
+ "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
}
if (enable_allmulti)
- mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+ mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI);
if (enable_broadcast)
- mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+ mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH);
- mlx5e_handle_netdev_addr(priv);
+ mlx5e_handle_netdev_addr(fs, netdev);
if (disable_broadcast)
- mlx5e_del_l2_flow_rule(priv, &ea->broadcast);
+ mlx5e_del_l2_flow_rule(fs, &ea->broadcast);
if (disable_allmulti)
- mlx5e_del_l2_flow_rule(priv, &ea->allmulti);
+ mlx5e_del_l2_flow_rule(fs, &ea->allmulti);
if (disable_promisc)
- mlx5e_destroy_promisc_table(priv);
+ mlx5e_destroy_promisc_table(fs);
ea->promisc_enabled = promisc_enabled;
ea->allmulti_enabled = allmulti_enabled;
ea->broadcast_enabled = broadcast_enabled;
- mlx5e_vport_context_update(priv);
+ mlx5e_vport_context_update(fs, netdev);
}
static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
@@ -841,9 +869,9 @@ static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
ft->num_groups = 0;
}
-void mlx5e_init_l2_addr(struct mlx5e_priv *priv)
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev)
{
- ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast);
+ ether_addr_copy(fs->l2.broadcast.addr, netdev->broadcast);
}
void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
@@ -854,14 +882,15 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
ft->t = NULL;
}
-static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
struct ttc_params *ttc_params)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
int tt;
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
@@ -870,13 +899,14 @@ static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
ttc_params->dests[tt].tir_num =
tt == MLX5_TT_ANY ?
- mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
- mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss_inner(rx_res,
tt);
}
}
-void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
struct ttc_params *ttc_params, bool tunnel)
{
@@ -884,7 +914,7 @@ void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
int tt;
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
@@ -893,23 +923,23 @@ void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
ttc_params->dests[tt].tir_num =
tt == MLX5_TT_ANY ?
- mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
- mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss(rx_res, tt);
}
ttc_params->inner_ttc = tunnel;
- if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev))
+ if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev))
return;
for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
ttc_params->tunnel_dests[tt].type =
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
ttc_params->tunnel_dests[tt].ft =
- mlx5_get_ttc_flow_table(priv->fs.inner_ttc);
+ mlx5_get_ttc_flow_table(fs->inner_ttc);
}
}
-static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai)
{
if (!IS_ERR_OR_NULL(ai->rule)) {
@@ -918,10 +948,10 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
}
}
-static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai, int type)
{
- struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_spec *spec;
@@ -939,7 +969,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
outer_headers.dmac_47_16);
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
+ dest.ft = mlx5_get_ttc_flow_table(fs->ttc);
switch (type) {
case MLX5E_FULLMATCH:
@@ -957,8 +987,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(ai->rule)) {
- netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
- __func__, mv_dmac);
+ fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac);
err = PTR_ERR(ai->rule);
ai->rule = NULL;
}
@@ -1042,14 +1071,14 @@ err_destroy_groups:
return err;
}
-static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs)
{
- mlx5e_destroy_flow_table(&priv->fs.l2.ft);
+ mlx5e_destroy_flow_table(&fs->l2.ft);
}
-static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
+static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_l2_table *l2_table = &priv->fs.l2;
+ struct mlx5e_l2_table *l2_table = &fs->l2;
struct mlx5e_flow_table *ft = &l2_table->ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -1060,7 +1089,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_L2_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
@@ -1180,20 +1209,20 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
return err;
}
-static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
+static int mlx5e_fs_create_vlan_table(struct mlx5e_flow_steering *fs)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_flow_table *ft;
int err;
- ft = &priv->fs.vlan->ft;
+ ft = &fs->vlan->ft;
ft->num_groups = 0;
ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
ft_attr.level = MLX5E_VLAN_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t))
return PTR_ERR(ft->t);
@@ -1207,7 +1236,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
if (err)
goto err_free_g;
- mlx5e_add_vlan_rules(priv);
+ mlx5e_fs_add_vlan_rules(fs);
return 0;
@@ -1219,139 +1248,332 @@ err_destroy_vlan_table:
return err;
}
-static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs)
{
- mlx5e_del_vlan_rules(priv);
- mlx5e_destroy_flow_table(&priv->fs.vlan->ft);
+ mlx5e_del_vlan_rules(fs);
+ mlx5e_destroy_flow_table(&fs->vlan->ft);
}
-static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs)
{
- if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
return;
- mlx5_destroy_ttc_table(priv->fs.inner_ttc);
+ mlx5_destroy_ttc_table(fs->inner_ttc);
}
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs)
{
- mlx5_destroy_ttc_table(priv->fs.ttc);
+ mlx5_destroy_ttc_table(fs->ttc);
}
-static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
+static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
{
struct ttc_params ttc_params = {};
- if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
return 0;
- mlx5e_set_inner_ttc_params(priv, &ttc_params);
- priv->fs.inner_ttc = mlx5_create_inner_ttc_table(priv->mdev,
- &ttc_params);
- if (IS_ERR(priv->fs.inner_ttc))
- return PTR_ERR(priv->fs.inner_ttc);
+ mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params);
+ fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev,
+ &ttc_params);
+ if (IS_ERR(fs->inner_ttc))
+ return PTR_ERR(fs->inner_ttc);
return 0;
}
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
{
struct ttc_params ttc_params = {};
- mlx5e_set_ttc_params(priv, &ttc_params, true);
- priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
- if (IS_ERR(priv->fs.ttc))
- return PTR_ERR(priv->fs.ttc);
+ mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true);
+ fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params);
+ if (IS_ERR(fs->ttc))
+ return PTR_ERR(fs->ttc);
return 0;
}
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev)
{
+ struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
int err;
- priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
-
- if (!priv->fs.ns)
+ if (!ns)
return -EOPNOTSUPP;
- err = mlx5e_arfs_create_tables(priv);
+ mlx5e_fs_set_ns(fs, ns, false);
+ err = mlx5e_arfs_create_tables(fs, rx_res,
+ !!(netdev->hw_features & NETIF_F_NTUPLE));
if (err) {
- netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
- err);
- priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ fs_err(fs, "Failed to create arfs tables, err=%d\n", err);
+ netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- err = mlx5e_create_inner_ttc_table(priv);
+ err = mlx5e_create_inner_ttc_table(fs, rx_res);
if (err) {
- netdev_err(priv->netdev,
- "Failed to create inner ttc table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create inner ttc table, err=%d\n", err);
goto err_destroy_arfs_tables;
}
- err = mlx5e_create_ttc_table(priv);
+ err = mlx5e_create_ttc_table(fs, rx_res);
if (err) {
- netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create ttc table, err=%d\n", err);
goto err_destroy_inner_ttc_table;
}
- err = mlx5e_create_l2_table(priv);
+ err = mlx5e_create_l2_table(fs);
if (err) {
- netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create l2 table, err=%d\n", err);
goto err_destroy_ttc_table;
}
- err = mlx5e_create_vlan_table(priv);
+ err = mlx5e_fs_create_vlan_table(fs);
if (err) {
- netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create vlan table, err=%d\n", err);
goto err_destroy_l2_table;
}
- err = mlx5e_ptp_alloc_rx_fs(priv);
+ err = mlx5e_ptp_alloc_rx_fs(fs, profile);
if (err)
goto err_destory_vlan_table;
- mlx5e_ethtool_init_steering(priv);
+ mlx5e_ethtool_init_steering(fs);
return 0;
err_destory_vlan_table:
- mlx5e_destroy_vlan_table(priv);
+ mlx5e_destroy_vlan_table(fs);
err_destroy_l2_table:
- mlx5e_destroy_l2_table(priv);
+ mlx5e_destroy_l2_table(fs);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv);
+ mlx5e_destroy_ttc_table(fs);
err_destroy_inner_ttc_table:
- mlx5e_destroy_inner_ttc_table(priv);
+ mlx5e_destroy_inner_ttc_table(fs);
err_destroy_arfs_tables:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE));
return err;
}
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile)
{
- mlx5e_ptp_free_rx_fs(priv);
- mlx5e_destroy_vlan_table(priv);
- mlx5e_destroy_l2_table(priv);
- mlx5e_destroy_ttc_table(priv);
- mlx5e_destroy_inner_ttc_table(priv);
- mlx5e_arfs_destroy_tables(priv);
- mlx5e_ethtool_cleanup_steering(priv);
+ mlx5e_ptp_free_rx_fs(fs, profile);
+ mlx5e_destroy_vlan_table(fs);
+ mlx5e_destroy_l2_table(fs);
+ mlx5e_destroy_ttc_table(fs);
+ mlx5e_destroy_inner_ttc_table(fs);
+ mlx5e_arfs_destroy_tables(fs, ntuple);
+ mlx5e_ethtool_cleanup_steering(fs);
}
-int mlx5e_fs_init(struct mlx5e_priv *priv)
+static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs)
{
- priv->fs.vlan = kvzalloc(sizeof(*priv->fs.vlan), GFP_KERNEL);
- if (!priv->fs.vlan)
+ fs->vlan = kvzalloc(sizeof(*fs->vlan), GFP_KERNEL);
+ if (!fs->vlan)
return -ENOMEM;
return 0;
}
-void mlx5e_fs_cleanup(struct mlx5e_priv *priv)
+static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs)
+{
+ kvfree(fs->vlan);
+}
+
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs)
+{
+ return fs->vlan;
+}
+
+static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs)
+{
+ fs->tc = mlx5e_tc_table_alloc();
+ if (IS_ERR(fs->tc))
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_tc_table_free(fs->tc);
+}
+
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs)
+{
+ return fs->tc;
+}
+
+#ifdef CONFIG_MLX5_EN_RXNFC
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{
+ return mlx5e_ethtool_alloc(&fs->ethtool);
+}
+
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_ethtool_free(fs->ethtool);
+}
+
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs)
+{
+ return fs->ethtool;
+}
+#else
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{ return 0; }
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { }
+#endif
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy)
+{
+ struct mlx5e_flow_steering *fs;
+ int err;
+
+ fs = kvzalloc(sizeof(*fs), GFP_KERNEL);
+ if (!fs)
+ goto err;
+
+ fs->mdev = mdev;
+ fs->state_destroy = state_destroy;
+ if (mlx5e_profile_feature_cap(profile, FS_VLAN)) {
+ err = mlx5e_fs_vlan_alloc(fs);
+ if (err)
+ goto err_free_fs;
+ }
+
+ if (mlx5e_profile_feature_cap(profile, FS_TC)) {
+ err = mlx5e_fs_tc_alloc(fs);
+ if (err)
+ goto err_free_vlan;
+ }
+
+ err = mlx5e_fs_ethtool_alloc(fs);
+ if (err)
+ goto err_free_tc;
+
+ return fs;
+err_free_tc:
+ mlx5e_fs_tc_free(fs);
+err_free_vlan:
+ mlx5e_fs_vlan_free(fs);
+err_free_fs:
+ kvfree(fs);
+err:
+ return NULL;
+}
+
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_fs_ethtool_free(fs);
+ mlx5e_fs_tc_free(fs);
+ mlx5e_fs_vlan_free(fs);
+ kvfree(fs);
+}
+
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs)
+{
+ return &fs->l2;
+}
+
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress)
+{
+ return egress ? fs->egress_ns : fs->ns;
+}
+
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress)
+{
+ if (!egress)
+ fs->ns = ns;
+ else
+ fs->egress_ns = ns;
+}
+
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner)
+{
+ return inner ? fs->inner_ttc : fs->ttc;
+}
+
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner)
+{
+ if (!inner)
+ fs->ttc = ttc;
+ else
+ fs->inner_ttc = ttc;
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs)
+{
+ return fs->arfs;
+}
+
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs)
+{
+ fs->arfs = arfs;
+}
+#endif
+
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs)
+{
+ return fs->ptp_fs;
+}
+
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs)
+{
+ fs->ptp_fs = ptp_fs;
+}
+
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs)
+{
+ return fs->any;
+}
+
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any)
+{
+ fs->any = any;
+}
+
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs)
+{
+ return fs->accel_tcp;
+}
+
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp)
+{
+ fs->accel_tcp = accel_tcp;
+}
+#endif
+
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy)
+{
+ fs->state_destroy = state_destroy;
+}
+
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs,
+ bool vlan_strip_disable)
+{
+ fs->vlan_strip_disable = vlan_strip_disable;
+}
+
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs)
+{
+ return fs->udp;
+}
+
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp)
+{
+ fs->udp = udp;
+}
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs)
{
- kvfree(priv->fs.vlan);
- priv->fs.vlan = NULL;
+ return fs->mdev;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index ad0d234632a3..aac32e505c14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -34,6 +34,22 @@
#include "en.h"
#include "en/params.h"
#include "en/xsk/pool.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_ethtool_table {
+ struct mlx5_flow_table *ft;
+ int num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+ struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+ struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
+ struct list_head rules;
+ int tot_num_rules;
+};
static int flow_type_to_traffic_type(u32 flow_type);
@@ -66,6 +82,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
struct ethtool_rx_flow_spec *fs,
int num_tuples)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_ethtool_table *eth_ft;
struct mlx5_flow_namespace *ns;
@@ -81,18 +98,18 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
case UDP_V6_FLOW:
max_tuples = ETHTOOL_NUM_L3_L4_FTS;
prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
- eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ eth_ft = &ethtool->l3_l4_ft[prio];
break;
case IP_USER_FLOW:
case IPV6_USER_FLOW:
max_tuples = ETHTOOL_NUM_L3_L4_FTS;
prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
- eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ eth_ft = &ethtool->l3_l4_ft[prio];
break;
case ETHER_FLOW:
max_tuples = ETHTOOL_NUM_L2_FTS;
prio = max_tuples - num_tuples;
- eth_ft = &priv->fs.ethtool.l2_ft[prio];
+ eth_ft = &ethtool->l2_ft[prio];
prio += MLX5E_ETHTOOL_L2_PRIO;
break;
default:
@@ -382,15 +399,16 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
static void add_rule_to_list(struct mlx5e_priv *priv,
struct mlx5e_ethtool_rule *rule)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct list_head *head = &ethtool->rules;
struct mlx5e_ethtool_rule *iter;
- struct list_head *head = &priv->fs.ethtool.rules;
- list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ list_for_each_entry(iter, &ethtool->rules, list) {
if (iter->flow_spec.location > rule->flow_spec.location)
break;
head = &iter->list;
}
- priv->fs.ethtool.tot_num_rules++;
+ ethtool->tot_num_rules++;
list_add(&rule->list, head);
}
@@ -433,15 +451,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
eth_rule->rss = rss;
mlx5e_rss_refcnt_inc(eth_rule->rss);
} else {
- struct mlx5e_params *params = &priv->channels.params;
- enum mlx5e_rq_group group;
- u16 ix;
-
- mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
-
- *tirn = group == MLX5E_RQ_GROUP_XSK ?
- mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix) :
- mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
+ *tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie);
}
return 0;
@@ -499,15 +509,16 @@ free:
return err ? ERR_PTR(err) : rule;
}
-static void del_ethtool_rule(struct mlx5e_priv *priv,
+static void del_ethtool_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_ethtool_rule *eth_rule)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
if (eth_rule->rule)
mlx5_del_flow_rules(eth_rule->rule);
if (eth_rule->rss)
mlx5e_rss_refcnt_dec(eth_rule->rss);
list_del(&eth_rule->list);
- priv->fs.ethtool.tot_num_rules--;
+ ethtool->tot_num_rules--;
put_flow_table(eth_rule->eth_ft);
kfree(eth_rule);
}
@@ -515,9 +526,10 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
int location)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5e_ethtool_rule *iter;
- list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ list_for_each_entry(iter, &ethtool->rules, list) {
if (iter->flow_spec.location == location)
return iter;
}
@@ -531,7 +543,7 @@ static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
eth_rule = find_ethtool_rule(priv, location);
if (eth_rule)
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
if (!eth_rule)
@@ -662,8 +674,7 @@ static int validate_flow(struct mlx5e_priv *priv,
return -ENOSPC;
if (fs->ring_cookie != RX_CLS_FLOW_DISC)
- if (!mlx5e_qid_validate(priv->profile, &priv->channels.params,
- fs->ring_cookie))
+ if (fs->ring_cookie >= priv->channels.params.num_channels)
return -EINVAL;
switch (flow_type_mask(fs->flow_type)) {
@@ -742,10 +753,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
eth_rule->flow_spec = *fs;
eth_rule->eth_ft = eth_ft;
- if (!eth_ft->ft) {
- err = -EINVAL;
- goto del_ethtool_rule;
- }
+
rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -757,7 +765,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
return 0;
del_ethtool_rule:
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
return err;
}
@@ -777,7 +785,7 @@ mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location)
goto out;
}
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
out:
return err;
}
@@ -786,12 +794,13 @@ static int
mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
struct ethtool_rxnfc *info, int location)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5e_ethtool_rule *eth_rule;
if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
return -EINVAL;
- list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
+ list_for_each_entry(eth_rule, &ethtool->rules, list) {
int index;
if (eth_rule->flow_spec.location != location)
@@ -829,18 +838,34 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
return err;
}
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{
+ *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL);
+ if (!*ethtool)
+ return -ENOMEM;
+ return 0;
+}
+
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool)
{
+ kvfree(ethtool);
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
struct mlx5e_ethtool_rule *iter;
struct mlx5e_ethtool_rule *temp;
- list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list)
- del_ethtool_rule(priv, iter);
+ list_for_each_entry_safe(iter, temp, &ethtool->rules, list)
+ del_ethtool_rule(fs, iter);
}
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs)
{
- INIT_LIST_HEAD(&priv->fs.ethtool.rules);
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
+
+ INIT_LIST_HEAD(&ethtool->rules);
}
static int flow_type_to_traffic_type(u32 flow_type)
@@ -962,11 +987,12 @@ int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
struct ethtool_rxnfc *info, u32 *rule_locs)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
int err = 0;
switch (info->cmd) {
case ETHTOOL_GRXCLSRLCNT:
- info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+ info->rule_cnt = ethtool->tot_num_rules;
break;
case ETHTOOL_GRXCLSRULE:
err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 65571593ec5c..e3a4f01bcceb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -31,12 +31,12 @@
*/
#include <net/tc_act/tc_gact.h>
-#include <net/pkt_cls.h>
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include <net/geneve.h>
#include <linux/bpf.h>
#include <linux/if_bridge.h>
+#include <linux/filter.h>
#include <net/page_pool.h>
#include <net/xdp_sock_drv.h>
#include "eswitch.h"
@@ -45,10 +45,9 @@
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
+#include "en_accel/macsec.h"
#include "en_accel/en_accel.h"
-#include "en_accel/tls.h"
-#include "accel/ipsec.h"
-#include "accel/tls.h"
+#include "en_accel/ktls.h"
#include "lib/vxlan.h"
#include "lib/clock.h"
#include "en/port.h"
@@ -65,25 +64,29 @@
#include "en/devlink.h"
#include "lib/mlx5.h"
#include "en/ptp.h"
+#include "en/htb.h"
#include "qos.h"
#include "en/trap.h"
-#include "fpga/ipsec.h"
-bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
- MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
- MLX5_CAP_ETH(mdev, reg_umr_sq);
- u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
- bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+ u16 umr_wqebbs, max_wqebbs;
+ bool striding_rq_umr;
+ striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
if (!striding_rq_umr)
return false;
- if (!inline_umr) {
- mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
- (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+ max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+ /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is
+ * calculated from mlx5e_get_max_sq_aligned_wqebbs.
+ */
+ if (WARN_ON(umr_wqebbs > max_wqebbs))
return false;
- }
+
return true;
}
@@ -200,21 +203,35 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb);
}
+static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+
+ WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD);
+
+ return entries * umr_entry_size / MLX5_OCTWORD;
+}
+
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
struct mlx5e_icosq *sq,
struct mlx5e_umr_wqe *wqe)
{
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
- u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
+ u16 octowords;
+ u8 ds_cnt;
+
+ ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode),
+ MLX5_SEND_WQE_DS);
cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
ds_cnt);
- cseg->umr_mkey = rq->mkey_be;
+ cseg->umr_mkey = rq->mpwqe.umr_mkey_be;
ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
- ucseg->xlt_octowords =
- cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
+ octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode);
+ ucseg->xlt_octowords = cpu_to_be16(octowords);
ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}
@@ -260,10 +277,12 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
{
int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ size_t alloc_size;
+
+ alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units,
+ rq->mpwqe.pages_per_wqe));
- rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
- sizeof(*rq->mpwqe.info)),
- GFP_KERNEL, node);
+ rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
if (!rq->mpwqe.info)
return -ENOMEM;
@@ -272,18 +291,52 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
return 0;
}
-static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
- u64 npages, u8 page_shift, u32 *umr_mkey,
- dma_addr_t filler_addr)
+
+static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5_MKC_ACCESS_MODE_MTT;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return MLX5_MKC_ACCESS_MODE_KSM;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return MLX5_MKC_ACCESS_MODE_KLMS;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return MLX5_MKC_ACCESS_MODE_KSM;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
+ u32 npages, u8 page_shift, u32 *umr_mkey,
+ dma_addr_t filler_addr,
+ enum mlx5e_mpwrq_umr_mode umr_mode,
+ u32 xsk_chunk_size)
{
struct mlx5_mtt *mtt;
+ struct mlx5_ksm *ksm;
+ struct mlx5_klm *klm;
+ u32 octwords;
int inlen;
void *mkc;
u32 *in;
int err;
int i;
- inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
+ if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED ||
+ umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) &&
+ !MLX5_CAP_GEN(mdev, fixed_buffer_size)) {
+ mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n");
+ return -EINVAL;
+ }
+
+ octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode);
+
+ inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in),
+ MLX5_OCTWORD, octwords);
+ if (inlen < 0)
+ return inlen;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
@@ -295,16 +348,17 @@ static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode));
mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
MLX5_SET64(mkc, mkc, len, npages << page_shift);
- MLX5_SET(mkc, mkc, translations_octword_size,
- MLX5_MTT_OCTW(npages));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
- MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- MLX5_MTT_OCTW(npages));
+ MLX5_SET(mkc, mkc, translations_octword_size, octwords);
+ if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)
+ MLX5_SET(mkc, mkc, log_page_size, page_shift - 2);
+ else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED)
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords);
/* Initialize the mkey with all MTTs pointing to a default
* page (filler_addr). When the channels are activated, UMR
@@ -312,9 +366,47 @@ static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
* the RQ's pool, while the gaps (wqe_overflow) remain mapped
* to the default page.
*/
- mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- for (i = 0 ; i < npages ; i++)
- mtt[i].ptag = cpu_to_be64(filler_addr);
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++) {
+ klm[i << 1] = (struct mlx5_klm) {
+ .va = cpu_to_be64(filler_addr),
+ .bcount = cpu_to_be32(xsk_chunk_size),
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ };
+ klm[(i << 1) + 1] = (struct mlx5_klm) {
+ .va = cpu_to_be64(filler_addr),
+ .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size),
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ };
+ }
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++)
+ ksm[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ .va = cpu_to_be64(filler_addr),
+ };
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++)
+ mtt[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(filler_addr),
+ };
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages * 4; i++) {
+ ksm[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ .va = cpu_to_be64(filler_addr),
+ };
+ }
+ break;
+ }
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
@@ -357,10 +449,27 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
{
- u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
+ u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0;
+ u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ u32 num_entries, max_num_entries;
+ u32 umr_mkey;
+ int err;
+
+ max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode);
- return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT,
- &rq->umr_mkey, rq->wqe_overflow.addr);
+ /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */
+ if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe,
+ &num_entries) ||
+ num_entries > max_num_entries))
+ mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n",
+ __func__, wq_size, rq->mpwqe.mtts_per_wqe,
+ max_num_entries);
+
+ err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift,
+ &umr_mkey, rq->wqe_overflow.addr,
+ rq->mpwqe.umr_mode, xsk_chunk_size);
+ rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey);
+ return err;
}
static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
@@ -377,18 +486,20 @@ static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
&rq->mpwqe.shampo->mkey);
}
-static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
-{
- return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT;
-}
-
static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
{
struct mlx5e_wqe_frag_info next_frag = {};
struct mlx5e_wqe_frag_info *prev = NULL;
int i;
- next_frag.di = &rq->wqe.di[0];
+ if (rq->xsk_pool) {
+ /* Assumptions used by XSK batched allocator. */
+ WARN_ON(rq->wqe.info.num_frags != 1);
+ WARN_ON(rq->wqe.info.log_num_frags != 0);
+ WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
+ }
+
+ next_frag.au = &rq->wqe.alloc_units[0];
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
@@ -398,7 +509,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
- next_frag.di++;
+ next_frag.au++;
next_frag.offset = 0;
if (prev)
prev->last_in_page = true;
@@ -415,12 +526,13 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
prev->last_in_page = true;
}
-int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node)
+static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node)
{
int len = wq_sz << rq->wqe.info.log_num_frags;
- rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), GFP_KERNEL, node);
- if (!rq->wqe.di)
+ rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)),
+ GFP_KERNEL, node);
+ if (!rq->wqe.alloc_units)
return -ENOMEM;
mlx5e_init_frags_partition(rq);
@@ -428,9 +540,9 @@ int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node)
return 0;
}
-void mlx5e_free_di_list(struct mlx5e_rq *rq)
+static void mlx5e_free_au_list(struct mlx5e_rq *rq)
{
- kvfree(rq->wqe.di);
+ kvfree(rq->wqe.alloc_units);
}
static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
@@ -476,16 +588,17 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
rq->clock = &mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
+ rq->channel = c;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
- rq->stats = &c->priv->channel_stats[c->ix].rq;
+ rq->stats = &c->priv->channel_stats[c->ix]->rq;
rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
err = mlx5e_rq_set_handlers(rq, params, NULL);
if (err)
return err;
- return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
}
static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
@@ -572,6 +685,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
pool_size = 1 << params->log_rq_mtu_frames;
+ rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
+
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
@@ -587,19 +702,31 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
- mlx5e_mpwqe_get_log_rq_size(params, xsk);
+ rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ rq->mpwqe.pages_per_wqe =
+ mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+ rq->mpwqe.umr_wqebbs =
+ mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+ rq->mpwqe.mtts_per_wqe =
+ mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+
+ pool_size = rq->mpwqe.pages_per_wqe <<
+ mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
goto err_rq_drop_page;
- rq->mkey_be = cpu_to_be32(rq->umr_mkey);
err = mlx5e_rq_alloc_mpwqe_info(rq, node);
if (err)
@@ -607,7 +734,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
if (err)
- goto err_free_by_rq_type;
+ goto err_free_mpwqe_info;
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -632,11 +759,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
goto err_rq_wq_destroy;
}
- err = mlx5e_init_di_list(rq, wq_sz, node);
+ err = mlx5e_init_au_list(rq, wq_sz, node);
if (err)
goto err_rq_frags;
-
- rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
}
if (xsk) {
@@ -661,14 +786,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
if (IS_ERR(rq->page_pool)) {
err = PTR_ERR(rq->page_pool);
rq->page_pool = NULL;
- goto err_free_shampo;
+ goto err_free_by_rq_type;
}
if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
}
if (err)
- goto err_free_shampo;
+ goto err_destroy_page_pool;
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -676,13 +801,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
u32 byte_count =
rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
- u64 dma_offset = mlx5e_get_mpwqe_offset(i);
+ u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) <<
+ rq->mpwqe.page_shift;
u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
0 : rq->buff.headroom;
wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
wqe->data[0].byte_count = cpu_to_be32(byte_count);
- wqe->data[0].lkey = rq->mkey_be;
+ wqe->data[0].lkey = rq->mpwqe.umr_mkey_be;
} else {
struct mlx5e_rx_wqe_cyc *wqe =
mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
@@ -720,19 +846,21 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
return 0;
-err_free_shampo:
- mlx5e_rq_free_shampo(rq);
+err_destroy_page_pool:
+ page_pool_destroy(rq->page_pool);
err_free_by_rq_type:
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ mlx5e_rq_free_shampo(rq);
+err_free_mpwqe_info:
kvfree(rq->mpwqe.info);
err_rq_mkey:
- mlx5_core_destroy_mkey(mdev, rq->umr_mkey);
+ mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
err_rq_drop_page:
mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
- mlx5e_free_di_list(rq);
+ mlx5e_free_au_list(rq);
err_rq_frags:
kvfree(rq->wqe.frags);
}
@@ -760,24 +888,22 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
- mlx5_core_destroy_mkey(rq->mdev, rq->umr_mkey);
+ mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
mlx5e_free_mpwqe_rq_drop_page(rq);
mlx5e_rq_free_shampo(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
kvfree(rq->wqe.frags);
- mlx5e_free_di_list(rq);
+ mlx5e_free_au_list(rq);
}
for (i = rq->page_cache.head; i != rq->page_cache.tail;
i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
- struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
-
/* With AF_XDP, page_cache is not used, so this loop is not
* entered, and it's safe to call mlx5e_page_release_dynamic
* directly.
*/
- mlx5e_page_release_dynamic(rq, dma_info, false);
+ mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false);
}
xdp_rxq_info_unreg(&rq->xdp_rxq);
@@ -832,7 +958,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
return err;
}
-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
+static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -861,6 +987,32 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
return err;
}
+static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+{
+ struct net_device *dev = rq->netdev;
+ int err;
+
+ err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
+ if (err) {
+ netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+ return err;
+ }
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err) {
+ netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
+{
+ mlx5e_free_rx_descs(rq);
+
+ return mlx5e_rq_to_ready(rq, curr_state);
+}
+
static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -1033,9 +1185,6 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
if (err)
goto err_destroy_rq;
- if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev))
- __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
-
if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
@@ -1069,13 +1218,6 @@ err_free_rq:
void mlx5e_activate_rq(struct mlx5e_rq *rq)
{
set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
- if (rq->icosq) {
- mlx5e_trigger_irq(rq->icosq);
- } else {
- local_bh_disable();
- napi_schedule(rq->cq.napi);
- local_bh_enable();
- }
}
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
@@ -1087,8 +1229,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
void mlx5e_close_rq(struct mlx5e_rq *rq)
{
cancel_work_sync(&rq->dim.work);
- if (rq->icosq)
- cancel_work_sync(&rq->icosq->recover_work);
cancel_work_sync(&rq->recover_work);
mlx5e_destroy_rq(rq);
mlx5e_free_rx_descs(rq);
@@ -1161,10 +1301,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->xsk_pool = xsk_pool;
sq->stats = sq->xsk_pool ?
- &c->priv->channel_stats[c->ix].xsksq :
+ &c->priv->channel_stats[c->ix]->xsksq :
is_redirect ?
- &c->priv->channel_stats[c->ix].xdpsq :
- &c->priv->channel_stats[c->ix].rq_xdpsq;
+ &c->priv->channel_stats[c->ix]->xdpsq :
+ &c->priv->channel_stats[c->ix]->rq_xdpsq;
+ sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) :
+ mlx5e_stop_room_for_max_wqe(mdev);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1216,9 +1359,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_icosq_cqe_err(sq);
}
+static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+ recover_work);
+
+ /* Not implemented yet. */
+
+ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
+}
+
static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
struct mlx5e_sq_param *param,
- struct mlx5e_icosq *sq)
+ struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
struct mlx5_core_dev *mdev = c->mdev;
@@ -1239,7 +1393,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
if (err)
goto err_sq_wq_destroy;
- INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
+ INIT_WORK(&sq->recover_work, recover_work_func);
return 0;
@@ -1303,7 +1457,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
- sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
@@ -1314,10 +1467,11 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
- if (MLX5_IPSEC_DEV(c->priv->mdev))
+ if (mlx5_ipsec_device_caps(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (param->is_mpw)
set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
@@ -1575,13 +1729,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_tx_err_cqe(sq);
}
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
+static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_icosq(c, param, sq);
+ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
if (err)
return err;
@@ -1620,7 +1775,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
synchronize_net(); /* Sync with NAPI. */
}
-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1648,14 +1803,22 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+
+ /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
+ * supported by upstream, and there is no defined trigger to allow
+ * transmitting redirected multi-buffer frames.
+ */
+ if (param->is_xdp_mb && !is_redirect)
+ set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
+
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
if (err)
goto err_free_xdpsq;
mlx5e_set_xmit_fp(sq, param->is_mpw);
- if (!param->is_mpw) {
- unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
+ unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
unsigned int inline_hdr_sz = 0;
int i;
@@ -1898,8 +2061,7 @@ static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix,
{
int tc;
- if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL ||
- !params->mqprio.channel.rl) {
+ if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) {
*hw_id = 0;
return 0;
}
@@ -1908,7 +2070,14 @@ static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix,
if (tc < 0)
return tc;
- return mlx5e_mqprio_rl_get_node_hw_id(params->mqprio.channel.rl, tc, hw_id);
+ if (tc >= params->mqprio.num_tc) {
+ WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u",
+ tc, params->mqprio.num_tc);
+ return -EINVAL;
+ }
+
+ *hw_id = params->mqprio.channel.hw_id[tc];
+ return 0;
}
static int mlx5e_open_sqs(struct mlx5e_channel *c,
@@ -1928,7 +2097,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
params, &cparam->txq_sq, &c->sq[tc], tc,
qos_queue_group_id,
- &c->priv->channel_stats[c->ix].sq[tc]);
+ &c->priv->channel_stats[c->ix]->sq[tc]);
if (err)
goto err_close_sqs;
}
@@ -2084,11 +2253,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
spin_lock_init(&c->async_icosq_lock);
- err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq);
+ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
+ mlx5e_async_icosq_err_cqe_work);
if (err)
goto err_close_xdpsq_cq;
- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+ mutex_init(&c->icosq_recovery_lock);
+
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
+ mlx5e_icosq_err_cqe_work);
if (err)
goto err_close_async_icosq;
@@ -2156,9 +2329,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c)
mlx5e_close_xdpsq(&c->xdpsq);
if (c->xdp)
mlx5e_close_xdpsq(&c->rq_xdpsq);
+ /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
+ cancel_work_sync(&c->icosq.recover_work);
mlx5e_close_rq(&c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
+ mutex_destroy(&c->icosq_recovery_lock);
mlx5e_close_icosq(&c->async_icosq);
if (c->xdp)
mlx5e_close_cq(&c->rq_xdpsq.cq);
@@ -2176,6 +2352,44 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
}
+static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
+{
+ if (ix > priv->stats_nch) {
+ netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix,
+ priv->stats_nch);
+ return -EINVAL;
+ }
+
+ if (priv->channel_stats[ix])
+ return 0;
+
+ /* Asymmetric dynamic memory allocation.
+ * Freed in mlx5e_priv_arrays_free, not on channel closure.
+ */
+ mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix);
+ priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!priv->channel_stats[ix])
+ return -ENOMEM;
+ priv->stats_nch++;
+
+ return 0;
+}
+
+void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
+{
+ spin_lock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_irq(&c->async_icosq);
+ spin_unlock_bh(&c->async_icosq_lock);
+}
+
+void mlx5e_trigger_napi_sched(struct napi_struct *napi)
+{
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+}
+
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
@@ -2193,6 +2407,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (err)
return err;
+ err = mlx5e_channel_stats_alloc(priv, ix, cpu);
+ if (err)
+ return err;
+
c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
if (!c)
return -ENOMEM;
@@ -2207,11 +2425,11 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
c->num_tc = mlx5e_get_dcb_num_tc(params);
c->xdp = !!params->xdp_prog;
- c->stats = &priv->channel_stats[ix].ch;
+ c->stats = &priv->channel_stats[ix]->ch;
c->aff_mask = irq_get_effective_affinity_mask(irq);
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
- netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
err = mlx5e_open_queues(c, params, cparam);
if (unlikely(err))
@@ -2249,10 +2467,13 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_icosq(&c->icosq);
mlx5e_activate_icosq(&c->async_icosq);
- mlx5e_activate_rq(&c->rq);
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_activate_xsk(c);
+ else
+ mlx5e_activate_rq(&c->rq);
+
+ mlx5e_trigger_napi_icosq(c);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -2261,8 +2482,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_deactivate_xsk(c);
+ else
+ mlx5e_deactivate_rq(&c->rq);
- mlx5e_deactivate_rq(&c->rq);
mlx5e_deactivate_icosq(&c->async_icosq);
mlx5e_deactivate_icosq(&c->icosq);
for (tc = 0; tc < c->num_tc; tc++)
@@ -2318,9 +2540,11 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
goto err_close_channels;
}
- err = mlx5e_qos_open_queues(priv, chs);
- if (err)
- goto err_close_ptp;
+ if (priv->htb) {
+ err = mlx5e_qos_open_queues(priv, chs);
+ if (err)
+ goto err_close_ptp;
+ }
mlx5e_health_channels_update(priv);
kvfree(cparam);
@@ -2352,8 +2576,6 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs)
mlx5e_ptp_activate_channel(chs->ptp);
}
-#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
-
static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
{
int err = 0;
@@ -2361,8 +2583,12 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
for (i = 0; i < chs->num; i++) {
int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ continue;
- err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+ err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout);
/* Don't wait on the XSK RQ, because the newer xdpsock sample
* doesn't provide any Fill Ring entries at the setup stage.
@@ -2502,9 +2728,11 @@ static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
{
- int qos_queues, nch, ntc, num_txqs, err;
+ int nch, ntc, num_txqs, err;
+ int qos_queues = 0;
- qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
+ if (priv->htb)
+ qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb);
nch = priv->channels.params.num_channels;
ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
@@ -2525,7 +2753,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
struct net_device *netdev = priv->netdev;
int old_num_txqs, old_ntc;
- int num_rxqs, nch, ntc;
+ int nch, ntc;
int err;
int i;
@@ -2536,7 +2764,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
nch = priv->channels.params.num_channels;
ntc = priv->channels.params.mqprio.num_tc;
- num_rxqs = nch * priv->profile->rq_groups;
tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
@@ -2545,18 +2772,11 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
err = mlx5e_update_tx_netdev_queues(priv);
if (err)
goto err_tcs;
- err = netif_set_real_num_rx_queues(netdev, num_rxqs);
+ err = netif_set_real_num_rx_queues(netdev, nch);
if (err) {
netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
goto err_txqs;
}
- if (priv->mqprio_rl != priv->channels.params.mqprio.channel.rl) {
- if (priv->mqprio_rl) {
- mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
- mlx5e_mqprio_rl_free(priv->mqprio_rl);
- }
- priv->mqprio_rl = priv->channels.params.mqprio.channel.rl;
- }
return 0;
@@ -2598,7 +2818,7 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
}
}
-int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
+static int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
{
u16 count = priv->channels.params.num_channels;
int err;
@@ -2631,43 +2851,46 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
struct mlx5e_txqsq *sq = &c->sq[tc];
priv->txq2sq[sq->txq_ix] = sq;
- priv->channel_tc2realtxq[i][tc] = i + tc * ch;
}
}
if (!priv->channels.ptp)
- return;
+ goto out;
if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
- return;
+ goto out;
for (tc = 0; tc < num_tc; tc++) {
struct mlx5e_ptp *c = priv->channels.ptp;
struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq;
- priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc;
}
-}
-static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv)
-{
- /* Sync with mlx5e_select_queue. */
- WRITE_ONCE(priv->num_tc_x_num_ch,
- mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num);
+out:
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
}
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
- mlx5e_update_num_tc_x_num_ch(priv);
mlx5e_build_txq_maps(priv);
mlx5e_activate_channels(&priv->channels);
- mlx5e_qos_activate_queues(priv);
+ if (priv->htb)
+ mlx5e_qos_activate_queues(priv);
mlx5e_xdp_tx_enable(priv);
+
+ /* dev_watchdog() wants all TX queues to be started when the carrier is
+ * OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
+ * Make it happy to avoid TX timeout false alarms.
+ */
netif_tx_start_all_queues(priv->netdev);
if (mlx5e_is_vport_rep(priv))
- mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_activate_channels(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2681,13 +2904,15 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
mlx5e_rx_res_channels_deactivate(priv->rx_res);
if (mlx5e_is_vport_rep(priv))
- mlx5e_remove_sqs_fwd_rules(priv);
+ mlx5e_rep_deactivate_channels(priv);
- /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
- * polling for inactive tx queues.
+ /* The results of ndo_select_queue are unreliable, while netdev config
+ * is being changed (real_num_tx_queues, num_tc). Stop all queues to
+ * prevent ndo_start_xmit from being called, so that it can assume that
+ * the selected queue is always valid.
*/
- netif_tx_stop_all_queues(priv->netdev);
netif_tx_disable(priv->netdev);
+
mlx5e_xdp_tx_disable(priv);
mlx5e_deactivate_channels(&priv->channels);
}
@@ -2747,6 +2972,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
mlx5e_close_channels(&old_chs);
priv->profile->update_rx(priv);
+ mlx5e_selq_apply(&priv->selq);
out:
mlx5e_activate_priv_channels(priv);
@@ -2770,13 +2996,24 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
return mlx5e_switch_priv_params(priv, params, preactivate, context);
new_chs.params = *params;
+
+ mlx5e_selq_prepare_params(&priv->selq, &new_chs.params);
+
err = mlx5e_open_channels(priv, &new_chs);
if (err)
- return err;
+ goto err_cancel_selq;
+
err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
if (err)
- mlx5e_close_channels(&new_chs);
+ goto err_close;
+ return 0;
+
+err_close:
+ mlx5e_close_channels(&new_chs);
+
+err_cancel_selq:
+ mlx5e_selq_cancel(&priv->selq);
return err;
}
@@ -2816,6 +3053,8 @@ int mlx5e_open_locked(struct net_device *netdev)
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
+ mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params);
+
set_bit(MLX5E_STATE_OPENED, &priv->state);
err = mlx5e_open_channels(priv, &priv->channels);
@@ -2823,6 +3062,7 @@ int mlx5e_open_locked(struct net_device *netdev)
goto err_clear_state_opened_flag;
priv->profile->update_rx(priv);
+ mlx5e_selq_apply(&priv->selq);
mlx5e_activate_priv_channels(priv);
mlx5e_apply_traps(priv, true);
if (priv->profile->update_carrier)
@@ -2833,6 +3073,7 @@ int mlx5e_open_locked(struct net_device *netdev)
err_clear_state_opened_flag:
clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ mlx5e_selq_cancel(&priv->selq);
return err;
}
@@ -3050,6 +3291,12 @@ err_close_tises:
static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
{
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ priv->mqprio_rl = NULL;
+ }
+ mlx5e_accel_cleanup_tx(priv);
mlx5e_destroy_tises(priv);
}
@@ -3118,19 +3365,38 @@ static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc)
{
params->mqprio.mode = TC_MQPRIO_MODE_DCB;
params->mqprio.num_tc = num_tc;
- params->mqprio.channel.rl = NULL;
mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc,
params->num_channels);
}
+static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params,
+ struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ u32 hw_id = 0;
+
+ if (rl)
+ mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id);
+ params->mqprio.channel.hw_id[tc] = hw_id;
+ }
+}
+
static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params,
- struct tc_mqprio_qopt *qopt,
+ struct tc_mqprio_qopt_offload *mqprio,
struct mlx5e_mqprio_rl *rl)
{
+ int tc;
+
params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
- params->mqprio.num_tc = qopt->num_tc;
- params->mqprio.channel.rl = rl;
- mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, qopt);
+ params->mqprio.num_tc = mqprio->qopt.num_tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++)
+ params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc];
+
+ mlx5e_mqprio_rl_update_params(params, rl);
+ mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt);
}
static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
@@ -3156,6 +3422,12 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
err = mlx5e_safe_switch_params(priv, &new_params,
mlx5e_num_channels_changed_ctx, NULL, true);
+ if (!err && priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ priv->mqprio_rl = NULL;
+ }
+
priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
mlx5e_get_dcb_num_tc(&priv->channels.params));
return err;
@@ -3214,16 +3486,38 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
return 0;
}
-static bool mlx5e_mqprio_rate_limit(struct tc_mqprio_qopt_offload *mqprio)
+static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[])
{
int tc;
- for (tc = 0; tc < mqprio->qopt.num_tc; tc++)
- if (mqprio->max_rate[tc])
+ for (tc = 0; tc < num_tc; tc++)
+ if (max_rate[tc])
return true;
return false;
}
+static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev,
+ u8 num_tc, u64 max_rate[])
+{
+ struct mlx5e_mqprio_rl *rl;
+ int err;
+
+ if (!mlx5e_mqprio_rate_limit(num_tc, max_rate))
+ return NULL;
+
+ rl = mlx5e_mqprio_rl_alloc();
+ if (!rl)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate);
+ if (err) {
+ mlx5e_mqprio_rl_free(rl);
+ return ERR_PTR(err);
+ }
+
+ return rl;
+}
+
static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
struct tc_mqprio_qopt_offload *mqprio)
{
@@ -3237,32 +3531,32 @@ static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
if (err)
return err;
- rl = NULL;
- if (mlx5e_mqprio_rate_limit(mqprio)) {
- rl = mlx5e_mqprio_rl_alloc();
- if (!rl)
- return -ENOMEM;
- err = mlx5e_mqprio_rl_init(rl, priv->mdev, mqprio->qopt.num_tc,
- mqprio->max_rate);
- if (err) {
- mlx5e_mqprio_rl_free(rl);
- return err;
- }
- }
+ rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate);
+ if (IS_ERR(rl))
+ return PTR_ERR(rl);
new_params = priv->channels.params;
- mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt, rl);
+ mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl);
nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1;
preactivate = nch_changed ? mlx5e_num_channels_changed_ctx :
mlx5e_update_netdev_queues_ctx;
err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true);
- if (err && rl) {
- mlx5e_mqprio_rl_cleanup(rl);
- mlx5e_mqprio_rl_free(rl);
+ if (err) {
+ if (rl) {
+ mlx5e_mqprio_rl_cleanup(rl);
+ mlx5e_mqprio_rl_free(rl);
+ }
+ return err;
}
- return err;
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ }
+ priv->mqprio_rl = rl;
+
+ return 0;
}
static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
@@ -3271,7 +3565,7 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
/* MQPRIO is another toplevel qdisc that can't be attached
* simultaneously with the offloaded HTB.
*/
- if (WARN_ON(priv->htb.maj_id))
+ if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
return -EINVAL;
switch (mqprio->mode) {
@@ -3284,47 +3578,6 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
}
}
-static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
-{
- int res;
-
- switch (htb->command) {
- case TC_HTB_CREATE:
- return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid,
- htb->extack);
- case TC_HTB_DESTROY:
- return mlx5e_htb_root_del(priv);
- case TC_HTB_LEAF_ALLOC_QUEUE:
- res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid,
- htb->rate, htb->ceil, htb->extack);
- if (res < 0)
- return res;
- htb->qid = res;
- return 0;
- case TC_HTB_LEAF_TO_INNER:
- return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
- htb->rate, htb->ceil, htb->extack);
- case TC_HTB_LEAF_DEL:
- return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack);
- case TC_HTB_LEAF_DEL_LAST:
- case TC_HTB_LEAF_DEL_LAST_FORCE:
- return mlx5e_htb_leaf_del_last(priv, htb->classid,
- htb->command == TC_HTB_LEAF_DEL_LAST_FORCE,
- htb->extack);
- case TC_HTB_NODE_MODIFY:
- return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil,
- htb->extack);
- case TC_HTB_LEAF_QUERY_QUEUE:
- res = mlx5e_get_txq_by_classid(priv, htb->classid);
- if (res < 0)
- return res;
- htb->qid = res;
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
-
static LIST_HEAD(mlx5e_block_cb_list);
static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -3358,7 +3611,7 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
return err;
case TC_SETUP_QDISC_HTB:
mutex_lock(&priv->state_lock);
- err = mlx5e_setup_tc_htb(priv, type_data);
+ err = mlx5e_htb_setup_tc(priv, type_data);
mutex_unlock(&priv->state_lock);
return err;
default:
@@ -3371,7 +3624,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
int i;
for (i = 0; i < priv->stats_nch; i++) {
- struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+ struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
int j;
@@ -3446,7 +3699,8 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->rx_length_errors =
PPORT_802_3_GET(pstats, a_in_range_length_errors) +
PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
- PPORT_802_3_GET(pstats, a_frame_too_long_errors);
+ PPORT_802_3_GET(pstats, a_frame_too_long_errors) +
+ VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small);
stats->rx_crc_errors =
PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
@@ -3509,20 +3763,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
- if (enable && priv->xsk.refcnt) {
- netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
- priv->xsk.refcnt);
- err = -EINVAL;
- goto out;
- }
-
cur_params = &priv->channels.params;
- if (enable && !MLX5E_GET_PFLAG(cur_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
- netdev_warn(netdev, "can't set LRO with legacy RQ\n");
- err = -EINVAL;
- goto out;
- }
-
new_params = *cur_params;
if (enable)
@@ -3559,11 +3800,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
new_params = priv->channels.params;
if (enable) {
- if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
- netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
- err = -EINVAL;
- goto out;
- }
new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
new_params.packet_merge.shampo.match_criteria_type =
MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
@@ -3575,8 +3811,7 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
goto out;
}
- err = mlx5e_safe_switch_params(priv, &new_params,
- mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
out:
mutex_unlock(&priv->state_lock);
return err;
@@ -3587,9 +3822,11 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
struct mlx5e_priv *priv = netdev_priv(netdev);
if (enable)
- mlx5e_enable_cvlan_filter(priv);
+ mlx5e_enable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
else
- mlx5e_disable_cvlan_filter(priv);
+ mlx5e_disable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
return 0;
}
@@ -3597,21 +3834,26 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
static int set_feature_hw_tc(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
+ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
+ MLX5_TC_FLAG(NIC_OFFLOAD);
+ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
}
#endif
- if (!enable && priv->htb.maj_id) {
+ mutex_lock(&priv->state_lock);
+ if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) {
netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
- return -EINVAL;
+ err = -EINVAL;
}
+ mutex_unlock(&priv->state_lock);
- return 0;
+ return err;
}
static int set_feature_rx_all(struct net_device *netdev, bool enable)
@@ -3693,20 +3935,45 @@ static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable);
priv->channels.params.vlan_strip_disable = !enable;
+
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
- if (err)
+ if (err) {
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, enable);
priv->channels.params.vlan_strip_disable = enable;
-
+ }
unlock:
mutex_unlock(&priv->state_lock);
return err;
}
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_flow_steering *fs = priv->fs;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return 0; /* no vlan table for uplink rep */
+
+ return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid);
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_flow_steering *fs = priv->fs;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return 0; /* no vlan table for uplink rep */
+
+ return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid);
+}
+
#ifdef CONFIG_MLX5_EN_ARFS
static int set_feature_arfs(struct net_device *netdev, bool enable)
{
@@ -3714,9 +3981,9 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
int err;
if (enable)
- err = mlx5e_arfs_enable(priv);
+ err = mlx5e_arfs_enable(priv->fs);
else
- err = mlx5e_arfs_disable(priv);
+ err = mlx5e_arfs_disable(priv->fs);
return err;
}
@@ -3724,12 +3991,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
static int mlx5e_handle_feature(struct net_device *netdev,
netdev_features_t *features,
- netdev_features_t wanted_features,
netdev_features_t feature,
mlx5e_feature_handler feature_handler)
{
- netdev_features_t changes = wanted_features ^ netdev->features;
- bool enable = !!(wanted_features & feature);
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
int err;
if (!(changes & feature))
@@ -3737,22 +4003,22 @@ static int mlx5e_handle_feature(struct net_device *netdev,
err = feature_handler(netdev, enable);
if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
netdev_err(netdev, "%s feature %pNF failed, err %d\n",
enable ? "Enable" : "Disable", &feature, err);
return err;
}
- MLX5E_SET_FEATURE(features, feature, enable);
return 0;
}
int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
- netdev_features_t oper_features = netdev->features;
+ netdev_features_t oper_features = features;
int err = 0;
#define MLX5E_HANDLE_FEATURE(feature, handler) \
- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
@@ -3790,6 +4056,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
if (netdev->features & NETIF_F_NTUPLE)
netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+ features &= ~NETIF_F_GRO_HW;
+ if (netdev->features & NETIF_F_GRO_HW)
+ netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
return features;
}
@@ -3797,12 +4067,14 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
netdev_features_t features)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_vlan_table *vlan;
struct mlx5e_params *params;
+ vlan = mlx5e_fs_get_vlan(priv->fs);
mutex_lock(&priv->state_lock);
params = &priv->channels.params;
- if (!priv->fs.vlan ||
- !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) {
+ if (!vlan ||
+ !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) {
/* HW strips the outer C-tag header, this is a problem
* for S-tag traffic.
*/
@@ -3822,10 +4094,39 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
}
}
+ if (params->xdp_prog) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with XDP\n");
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (priv->xsk.refcnt) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
}
if (mlx5e_is_uplink_rep(priv))
@@ -3862,7 +4163,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
* 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
*/
max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
- max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+ max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
max_mtu = min(max_mtu_frame, max_mtu_page);
netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
@@ -3874,6 +4175,33 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
return true;
}
+static bool mlx5e_params_validate_xdp(struct net_device *netdev,
+ struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ bool is_linear;
+
+ /* No XSK params: AF_XDP can't be enabled yet at the point of setting
+ * the XDP program.
+ */
+ is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
+
+ if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
+ netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+ if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
+ netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+
+ return true;
+}
+
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
mlx5e_fp_preactivate preactivate)
{
@@ -3893,10 +4221,8 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
if (err)
goto out;
- if (params->xdp_prog &&
- !mlx5e_rx_is_linear_skb(&new_params, NULL)) {
- netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, mlx5e_xdp_max_mtu(params, NULL));
+ if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev,
+ &new_params)) {
err = -EINVAL;
goto out;
}
@@ -3911,19 +4237,21 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
reset = false;
- if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) {
bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
&new_params, NULL);
- u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
- u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL);
+ u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL);
+ u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL);
/* Always reset in linear mode - hw_mtu is used in data path.
* Check that the mode was non-linear and didn't change.
* If XSK is active, XSK RQs are linear.
+ * Reset if the RQ size changed, even if it's non-linear.
*/
if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
- ppw_old == ppw_new)
+ sz_old == sz_new)
reset = false;
}
@@ -4038,7 +4366,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
goto err_unlock;
}
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
err = mlx5e_hwstamp_config_no_ptp_rx(priv,
config.rx_filter != HWTSTAMP_FILTER_NONE);
else
@@ -4370,24 +4698,11 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return -EINVAL;
}
- if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
- netdev_warn(netdev,
- "XDP is not available on Innova cards with IPsec support\n");
- return -EINVAL;
- }
-
new_params = priv->channels.params;
new_params.xdp_prog = prog;
- /* No XSK params: AF_XDP can't be enabled yet at the point of setting
- * the XDP program.
- */
- if (!mlx5e_rx_is_linear_skb(&new_params, NULL)) {
- netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
- new_params.sw_mtu,
- mlx5e_xdp_max_mtu(&new_params, NULL));
+ if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
return -EINVAL;
- }
return 0;
}
@@ -4424,8 +4739,20 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
new_params = priv->channels.params;
new_params.xdp_prog = prog;
- if (reset)
- mlx5e_set_rq_type(priv->mdev, &new_params);
+
+ /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
+ * supported with the new parameters: if PAGE_SIZE is bigger than
+ * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
+ * the MTU is small enough for the linear mode, because XDP uses strides
+ * of PAGE_SIZE on regular RQs.
+ */
+ if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
+ err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
+ if (err)
+ goto unlock;
+ }
+
old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
@@ -4454,6 +4781,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
unlock:
mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features. */
+ if (!err)
+ netdev_update_features(netdev);
+
return err;
}
@@ -4592,11 +4924,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
priv->max_nch);
mlx5e_params_mqprio_reset(params);
- /* Set an initial non-zero value, so that mlx5e_select_queue won't
- * divide by zero if called before first activating channels.
- */
- priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
-
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
@@ -4618,14 +4945,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
/* RQ */
mlx5e_build_rq_params(mdev, params);
- /* HW LRO */
- if (MLX5_CAP_ETH(mdev, lro_cap) &&
- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- /* No XSK params: checking the availability of striding RQ in general. */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- params->packet_merge.type = slow_pci_heuristic(mdev) ?
- MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
- }
params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
@@ -4735,6 +5054,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_RXCSUM;
netdev->vlan_features |= NETIF_F_RXHASH;
+ netdev->vlan_features |= NETIF_F_GSO_PARTIAL;
netdev->mpls_features |= NETIF_F_SG;
netdev->mpls_features |= NETIF_F_HW_CSUM;
@@ -4752,7 +5072,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
!MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
!MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
- mlx5e_check_fragmented_striding_rq_cap(mdev))
+ mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED))
netdev->vlan_features |= NETIF_F_LRO;
netdev->hw_features = netdev->vlan_features;
@@ -4761,10 +5082,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
- if (!!MLX5_CAP_GEN(mdev, shampo) &&
- mlx5e_check_fragmented_striding_rq_cap(mdev))
- netdev->hw_features |= NETIF_F_GRO_HW;
-
if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
@@ -4773,15 +5090,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
}
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
- netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
- netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
- netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
+ netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
}
if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
- netdev->hw_features |= NETIF_F_GSO_GRE;
- netdev->hw_enc_features |= NETIF_F_GSO_GRE;
- netdev->gso_partial_features |= NETIF_F_GSO_GRE;
+ netdev->hw_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
}
if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
@@ -4793,7 +5117,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
NETIF_F_GSO_IPXIP6;
}
- netdev->hw_features |= NETIF_F_GSO_PARTIAL;
netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
netdev->hw_features |= NETIF_F_GSO_UDP_L4;
netdev->features |= NETIF_F_GSO_UDP_L4;
@@ -4836,9 +5159,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->priv_flags |= IFF_UNICAST_FLT;
+ netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
mlx5e_set_netdev_dev_addr(netdev);
+ mlx5e_macsec_build_netdev(priv);
mlx5e_ipsec_build_netdev(priv);
- mlx5e_tls_build_netdev(priv);
+ mlx5e_ktls_build_netdev(priv);
}
void mlx5e_create_q_counters(struct mlx5e_priv *priv)
@@ -4883,6 +5208,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_flow_steering *fs;
int err;
mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
@@ -4890,17 +5216,20 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
mlx5e_timestamp_init(priv);
- err = mlx5e_fs_init(priv);
- if (err) {
+ fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!fs) {
+ err = -ENOMEM;
mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
return err;
}
+ priv->fs = fs;
err = mlx5e_ipsec_init(priv);
if (err)
mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
- err = mlx5e_tls_init(priv);
+ err = mlx5e_ktls_init(priv);
if (err)
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
@@ -4911,9 +5240,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
mlx5e_health_destroy_reporters(priv);
- mlx5e_tls_cleanup(priv);
+ mlx5e_ktls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
- mlx5e_fs_cleanup(priv);
+ mlx5e_fs_cleanup(priv->fs);
}
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -4934,7 +5263,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
- features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+ features = MLX5E_RX_RES_FEATURE_PTP;
if (priv->channels.params.tunneled_offload_en)
features |= MLX5E_RX_RES_FEATURE_INNER_FT;
err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
@@ -4944,7 +5273,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_flow_steering(priv);
+ err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile,
+ priv->netdev);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
goto err_destroy_rx_res;
@@ -4967,7 +5297,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
err_tc_nic_cleanup:
mlx5e_tc_nic_cleanup(priv);
err_destroy_flow_steering:
- mlx5e_destroy_flow_steering(priv);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
err_destroy_rx_res:
mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
@@ -4983,7 +5314,8 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
{
mlx5e_accel_cleanup_rx(priv);
mlx5e_tc_nic_cleanup(priv);
- mlx5e_destroy_flow_steering(priv);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
@@ -4991,6 +5323,23 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
priv->rx_res = NULL;
}
+static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params;
+ struct mlx5e_mqprio_rl *rl;
+
+ params = &priv->channels.params;
+ if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL)
+ return;
+
+ rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc,
+ params->mqprio.channel.max_rate);
+ if (IS_ERR(rl))
+ rl = NULL;
+ priv->mqprio_rl = rl;
+ mlx5e_mqprio_rl_update_params(params, rl);
+}
+
static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
{
int err;
@@ -5001,16 +5350,30 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
return err;
}
+ err = mlx5e_accel_init_tx(priv);
+ if (err)
+ goto err_destroy_tises;
+
+ mlx5e_set_mqprio_rl(priv);
mlx5e_dcbnl_initialize(priv);
return 0;
+
+err_destroy_tises:
+ mlx5e_destroy_tises(priv);
+ return err;
}
static void mlx5e_nic_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ mlx5e_fs_init_l2_addr(priv->fs, netdev);
- mlx5e_init_l2_addr(priv);
+ err = mlx5e_macsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err);
/* Marking the link as currently not needed by the Driver */
if (!netif_running(netdev))
@@ -5069,6 +5432,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5e_disable_async_events(priv);
mlx5_lag_remove_netdev(mdev, priv->netdev);
mlx5_vxlan_reset_to_default(mdev->vxlan);
+ mlx5e_macsec_cleanup(priv);
}
int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
@@ -5090,12 +5454,27 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.update_carrier = mlx5e_update_carrier,
.rx_handlers = &mlx5e_rx_handlers_nic,
.max_tc = MLX5E_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK),
.stats_grps = mlx5e_nic_stats_grps,
.stats_grps_num = mlx5e_nic_stats_grps_num,
- .rx_ptp_support = true,
+ .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) |
+ BIT(MLX5E_PROFILE_FEATURE_PTP_TX) |
+ BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) |
+ BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) |
+ BIT(MLX5E_PROFILE_FEATURE_FS_TC),
};
+static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ int nch;
+
+ nch = mlx5e_get_max_num_channels(mdev);
+
+ if (profile->max_nch_limit)
+ nch = min_t(int, nch, profile->max_nch_limit(mdev));
+ return nch;
+}
+
static unsigned int
mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
const struct mlx5e_profile *profile)
@@ -5104,11 +5483,10 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
unsigned int max_nch, tmp;
/* core resources */
- max_nch = mlx5e_get_max_num_channels(mdev);
+ max_nch = mlx5e_profile_max_num_channels(mdev, profile);
/* netdev rx queues */
- tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
- max_nch = min_t(unsigned int, max_nch, tmp);
+ max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues);
/* netdev tx queues */
tmp = netdev->num_tx_queues;
@@ -5122,25 +5500,52 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
return max_nch;
}
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
+{
+ /* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
+ * and 1 set of direct TIRS
+ */
+ return 2 * MLX5E_NUM_INDIR_TIRS
+ + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
+}
+
+void mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ set_rx_mode_work);
+
+ return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev);
+}
+
/* mlx5e generic netdev management API (move to en_common.c) */
int mlx5e_priv_init(struct mlx5e_priv *priv,
const struct mlx5e_profile *profile,
struct net_device *netdev,
struct mlx5_core_dev *mdev)
{
+ int nch, num_txqs, node;
+ int err;
+
+ num_txqs = netdev->num_tx_queues;
+ nch = mlx5e_calc_max_nch(mdev, netdev, profile);
+ node = dev_to_node(mlx5_core_dma_dev(mdev));
+
/* priv init */
priv->mdev = mdev;
priv->netdev = netdev;
priv->msglevel = MLX5E_MSG_LEVEL;
- priv->max_nch = mlx5e_calc_max_nch(mdev, netdev, profile);
- priv->stats_nch = priv->max_nch;
+ priv->max_nch = nch;
priv->max_opened_tc = 1;
if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
return -ENOMEM;
mutex_init(&priv->state_lock);
- hash_init(priv->htb.qos_tc2node);
+
+ err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
+ if (err)
+ goto err_free_cpumask;
+
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
@@ -5148,13 +5553,33 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
- goto err_free_cpumask;
+ goto err_free_selq;
+
+ priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
+ if (!priv->txq2sq)
+ goto err_destroy_workqueue;
+
+ priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
+ if (!priv->tx_rates)
+ goto err_free_txq2sq;
+
+ priv->channel_stats =
+ kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
+ if (!priv->channel_stats)
+ goto err_free_tx_rates;
return 0;
+err_free_tx_rates:
+ kfree(priv->tx_rates);
+err_free_txq2sq:
+ kfree(priv->txq2sq);
+err_destroy_workqueue:
+ destroy_workqueue(priv->wq);
+err_free_selq:
+ mlx5e_selq_cleanup(&priv->selq);
err_free_cpumask:
free_cpumask_var(priv->scratchpad.cpumask);
-
return -ENOMEM;
}
@@ -5166,28 +5591,58 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
if (!priv->mdev)
return;
+ for (i = 0; i < priv->stats_nch; i++)
+ kvfree(priv->channel_stats[i]);
+ kfree(priv->channel_stats);
+ kfree(priv->tx_rates);
+ kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
+ mutex_lock(&priv->state_lock);
+ mlx5e_selq_cleanup(&priv->selq);
+ mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
- for (i = 0; i < priv->htb.max_qos_sqs; i++)
- kfree(priv->htb.qos_sq_stats[i]);
- kvfree(priv->htb.qos_sq_stats);
-
- if (priv->mqprio_rl) {
- mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
- mlx5e_mqprio_rl_free(priv->mqprio_rl);
- }
+ for (i = 0; i < priv->htb_max_qos_sqs; i++)
+ kfree(priv->htb_qos_sq_stats[i]);
+ kvfree(priv->htb_qos_sq_stats);
memset(priv, 0, sizeof(*priv));
}
+static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ unsigned int nch, ptp_txqs, qos_txqs;
+
+ nch = mlx5e_profile_max_num_channels(mdev, profile);
+
+ ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) &&
+ mlx5e_profile_feature_cap(profile, PTP_TX) ?
+ profile->max_tc : 0;
+
+ qos_txqs = mlx5_qos_is_supported(mdev) &&
+ mlx5e_profile_feature_cap(profile, QOS_HTB) ?
+ mlx5e_qos_max_leaf_nodes(mdev) : 0;
+
+ return nch * profile->max_tc + ptp_txqs + qos_txqs;
+}
+
+static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ return mlx5e_profile_max_num_channels(mdev, profile);
+}
+
struct net_device *
-mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
- unsigned int txqs, unsigned int rxqs)
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile)
{
struct net_device *netdev;
+ unsigned int txqs, rxqs;
int err;
+ txqs = mlx5e_get_max_num_txqs(mdev, profile);
+ rxqs = mlx5e_get_max_num_rxqs(mdev, profile);
+
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs);
if (!netdev) {
mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
@@ -5201,6 +5656,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof
}
netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
dev_net_set(netdev, mlx5_core_net(mdev));
return netdev;
@@ -5234,6 +5690,16 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
int err;
clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+
+ /* Validate the max_wqe_size_sq capability. */
+ if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n",
+ mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS);
+ return -EIO;
+ }
/* max number of channels may have changed */
max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
@@ -5293,6 +5759,9 @@ err_cleanup_tx:
out:
mlx5e_reset_channels(priv->netdev);
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
cancel_work_sync(&priv->update_stats_work);
return err;
}
@@ -5302,6 +5771,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
const struct mlx5e_profile *profile = priv->profile;
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
if (profile->disable)
profile->disable(priv);
@@ -5389,7 +5861,7 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
static int mlx5e_resume(struct auxiliary_device *adev)
{
struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
- struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev);
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = edev->mdev;
int err;
@@ -5412,7 +5884,7 @@ static int mlx5e_resume(struct auxiliary_device *adev)
static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
{
- struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev);
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -5432,22 +5904,10 @@ static int mlx5e_probe(struct auxiliary_device *adev,
struct mlx5_core_dev *mdev = edev->mdev;
struct net_device *netdev;
pm_message_t state = {};
- unsigned int txqs, rxqs, ptp_txqs = 0;
struct mlx5e_priv *priv;
- int qos_sqs = 0;
int err;
- int nch;
- if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
- ptp_txqs = profile->max_tc;
-
- if (mlx5_qos_is_supported(mdev))
- qos_sqs = mlx5e_qos_max_leaf_nodes(mdev);
-
- nch = mlx5e_get_max_num_channels(mdev);
- txqs = nch * profile->max_tc + ptp_txqs + qos_sqs;
- rxqs = nch * profile->rq_groups;
- netdev = mlx5e_create_netdev(mdev, profile, txqs, rxqs);
+ netdev = mlx5e_create_netdev(mdev, profile);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
return -ENOMEM;
@@ -5456,7 +5916,7 @@ static int mlx5e_probe(struct auxiliary_device *adev,
mlx5e_build_nic_netdev(netdev);
priv = netdev_priv(netdev);
- dev_set_drvdata(&adev->dev, priv);
+ auxiliary_set_drvdata(adev, priv);
priv->profile = profile;
priv->ppriv = NULL;
@@ -5504,7 +5964,7 @@ err_destroy_netdev:
static void mlx5e_remove(struct auxiliary_device *adev)
{
- struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev);
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
pm_message_t state = {};
mlx5e_dcbnl_delete_app(priv);
@@ -5535,7 +5995,6 @@ int mlx5e_init(void)
{
int ret;
- mlx5e_ipsec_build_inverse_table();
mlx5e_build_ptys2ethtool_map();
ret = auxiliary_driver_register(&mlx5e_driver);
if (ret)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 48895d79796a..794cd8dfe9c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -50,10 +50,13 @@
#include "fs_core.h"
#include "lib/mlx5.h"
#include "lib/devcom.h"
+#include "lib/vxlan.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
#include "en_accel/ipsec.h"
#include "en/tc/int_port.h"
+#include "en/ptp.h"
+#include "en/fs_ethtool.h"
#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
@@ -67,7 +70,7 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
- strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
+ strscpy(drvinfo->driver, mlx5e_rep_driver_name,
sizeof(drvinfo->driver));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)",
@@ -219,16 +222,22 @@ static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
}
}
-static void mlx5e_rep_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+static void
+mlx5e_rep_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- mlx5e_ethtool_get_ringparam(priv, param);
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
}
-static int mlx5e_rep_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+static int
+mlx5e_rep_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -258,7 +267,7 @@ static int mlx5e_rep_get_coalesce(struct net_device *netdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_get_coalesce(priv, coal);
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
}
static int mlx5e_rep_set_coalesce(struct net_device *netdev,
@@ -268,7 +277,7 @@ static int mlx5e_rep_set_coalesce(struct net_device *netdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_set_coalesce(priv, coal);
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
}
static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
@@ -389,18 +398,30 @@ out_err:
return err;
}
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+static int
+mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
{
+ int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool is_uplink_rep = mlx5e_is_uplink_rep(priv);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int n, tc, nch, num_sqs = 0;
struct mlx5e_channel *c;
- int n, tc, num_sqs = 0;
int err = -ENOMEM;
+ bool ptp_sq;
u32 *sqs;
- sqs = kcalloc(priv->channels.num * mlx5e_get_dcb_num_tc(&priv->channels.params),
- sizeof(*sqs), GFP_KERNEL);
+ ptp_sq = !!(priv->channels.ptp &&
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS));
+ nch = priv->channels.num + ptp_sq;
+ /* +2 for xdpsqs, they don't exist on the ptp channel but will not be
+ * counted for by num_sqs.
+ */
+ if (is_uplink_rep)
+ sqs_per_channel += 2;
+
+ sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL);
if (!sqs)
goto out;
@@ -408,10 +429,23 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
c = priv->channels.c[n];
for (tc = 0; tc < c->num_tc; tc++)
sqs[num_sqs++] = c->sq[tc].sqn;
+
+ if (is_uplink_rep) {
+ if (c->xdp)
+ sqs[num_sqs++] = c->rq_xdpsq.sqn;
+
+ sqs[num_sqs++] = c->xdpsq.sqn;
+ }
+ }
+ if (ptp_sq) {
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+
+ for (tc = 0; tc < ptp_ch->num_tc; tc++)
+ sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn;
}
err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
- kfree(sqs);
+ kvfree(sqs);
out:
if (err)
@@ -419,7 +453,8 @@ out:
return err;
}
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+static void
+mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -428,6 +463,49 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
mlx5e_sqs2vport_stop(esw, rep);
}
+static int
+mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_group *g;
+
+ g = esw->fdb_table.offloads.send_to_vport_meta_grp;
+ if (!g)
+ return 0;
+
+ flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+
+ rpriv->send_to_vport_meta_rule = flow_rule;
+
+ return 0;
+}
+
+static void
+mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (rpriv->send_to_vport_meta_rule)
+ mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule);
+}
+
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_add_meta_tunnel_rule(priv);
+}
+
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_rep_del_meta_tunnel_rule(priv);
+ mlx5e_remove_sqs_fwd_rules(priv);
+}
+
static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -585,6 +663,18 @@ bool mlx5e_eswitch_vf_rep(const struct net_device *netdev)
return netdev->netdev_ops == &mlx5e_netdev_ops_rep;
}
+/* One indirect TIR set for outer. Inner not supported in reps. */
+#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS
+
+static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev)
+{
+ int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir);
+ int num_vports = mlx5_eswitch_get_total_vports(mdev);
+
+ return (max_tir_num - mlx5e_get_pf_num_tirs(mdev)
+ - (num_vports * REP_NUM_INDIR_TIRS)) / num_vports;
+}
+
static void mlx5e_build_rep_params(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -618,11 +708,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
-
- /* Set an initial non-zero value, so that mlx5e_select_queue won't
- * divide by zero if called before first activating channels.
- */
- priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ params->vlan_strip_disable = true;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
}
@@ -657,6 +744,13 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
mlx5e_build_rep_params(netdev);
mlx5e_timestamp_init(priv);
@@ -669,16 +763,26 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
err = mlx5e_ipsec_init(priv);
if (err)
mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err);
mlx5e_vxlan_set_netdev_info(priv);
- return mlx5e_init_rep(mdev, netdev);
+ mlx5e_build_rep_params(netdev);
+ mlx5e_timestamp_init(priv);
+ return 0;
}
static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
{
+ mlx5e_fs_cleanup(priv->fs);
mlx5e_ipsec_cleanup(priv);
}
@@ -689,19 +793,20 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
struct ttc_params ttc_params = {};
int err;
- priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
+ mlx5e_fs_set_ns(priv->fs,
+ mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL), false);
/* The inner_ttc in the ttc params is intentionally not set */
- mlx5e_set_ttc_params(priv, &ttc_params, false);
+ mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false);
if (rep->vport != MLX5_VPORT_UPLINK)
/* To give uplik rep TTC a lower level for chaining from root ft */
ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
- priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
- if (IS_ERR(priv->fs.ttc)) {
- err = PTR_ERR(priv->fs.ttc);
+ mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false);
+ if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) {
+ err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false));
netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
err);
return err;
@@ -721,7 +826,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
/* non uplik reps will skip any bypass tables and go directly to
* their own ttc
*/
- rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
+ rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false));
return 0;
}
@@ -797,10 +902,12 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
int err;
priv->rx_res = mlx5e_rx_res_alloc();
- if (!priv->rx_res)
- return -ENOMEM;
+ if (!priv->rx_res) {
+ err = -ENOMEM;
+ goto err_free_fs;
+ }
- mlx5e_init_l2_addr(priv);
+ mlx5e_fs_init_l2_addr(priv->fs, priv->netdev);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
if (err) {
@@ -827,29 +934,31 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_root_ft;
- mlx5e_ethtool_init_steering(priv);
+ mlx5e_ethtool_init_steering(priv->fs);
return 0;
err_destroy_root_ft:
mlx5e_destroy_rep_root_ft(priv);
err_destroy_ttc_table:
- mlx5_destroy_ttc_table(priv->fs.ttc);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
err_destroy_rx_res:
mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
+err_free_fs:
+ mlx5e_fs_cleanup(priv->fs);
return err;
}
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- mlx5e_ethtool_cleanup_steering(priv);
+ mlx5e_ethtool_cleanup_steering(priv->fs);
rep_vport_rx_rule_destroy(priv);
mlx5e_destroy_rep_root_ft(priv);
- mlx5_destroy_ttc_table(priv->fs.ttc);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_rx_res_free(priv->rx_res);
@@ -911,6 +1020,13 @@ err_event_reg:
return err;
}
+static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+ mlx5e_rep_tc_netdevice_event_unregister(rpriv);
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
+}
+
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -925,31 +1041,33 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
err = mlx5e_init_uplink_rep_tx(rpriv);
if (err)
- goto destroy_tises;
+ goto err_init_tx;
}
+ err = mlx5e_tc_ht_init(&rpriv->tc_ht);
+ if (err)
+ goto err_ht_init;
+
return 0;
-destroy_tises:
+err_ht_init:
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_cleanup_uplink_rep_tx(rpriv);
+err_init_tx:
mlx5e_destroy_tises(priv);
return err;
}
-static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
-{
- mlx5e_rep_tc_netdevice_event_unregister(rpriv);
- mlx5e_rep_bond_cleanup(rpriv);
- mlx5e_rep_tc_cleanup(rpriv);
-}
-
static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
- mlx5e_destroy_tises(priv);
+ mlx5e_tc_ht_cleanup(&rpriv->tc_ht);
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);
+
+ mlx5e_destroy_tises(priv);
}
static void mlx5e_rep_enable(struct mlx5e_priv *priv)
@@ -1027,6 +1145,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
rtnl_lock();
if (netif_running(netdev))
mlx5e_open(netdev);
+ udp_tunnel_nic_reset_ntf(priv->netdev);
netif_device_attach(netdev);
rtnl_unlock();
}
@@ -1048,6 +1167,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
mlx5_notifier_unregister(mdev, &priv->events_nb);
mlx5e_rep_tc_disable(priv);
mlx5_lag_remove_netdev(mdev, priv->netdev);
+ mlx5_vxlan_reset_to_default(mdev->vxlan);
}
static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
@@ -1082,8 +1202,8 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
&MLX5E_STATS_GRP(per_port_buff_congest),
#ifdef CONFIG_MLX5_EN_IPSEC
&MLX5E_STATS_GRP(ipsec_sw),
- &MLX5E_STATS_GRP(ipsec_hw),
#endif
+ &MLX5E_STATS_GRP(ptp),
};
static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
@@ -1104,10 +1224,9 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.update_stats = mlx5e_stats_update_ndo_stats,
.rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = 1,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_rep_stats_grps,
.stats_grps_num = mlx5e_rep_stats_grps_num,
- .rx_ptp_support = false,
+ .max_nch_limit = mlx5e_rep_max_nch_limit,
};
static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
@@ -1124,11 +1243,8 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.update_carrier = mlx5e_update_carrier,
.rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = MLX5E_MAX_NUM_TC,
- /* XSK is needed so we can replace profile with NIC netdev */
- .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK),
.stats_grps = mlx5e_ul_rep_stats_grps,
.stats_grps_num = mlx5e_ul_rep_stats_grps_num,
- .rx_ptp_support = false,
};
/* e-Switch vport representors */
@@ -1179,14 +1295,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
struct devlink_port *dl_port;
struct net_device *netdev;
struct mlx5e_priv *priv;
- unsigned int txqs, rxqs;
- int nch, err;
+ int err;
profile = &mlx5e_rep_profile;
- nch = mlx5e_get_max_num_channels(dev);
- txqs = nch * profile->max_tc;
- rxqs = nch * profile->rq_groups;
- netdev = mlx5e_create_netdev(dev, profile, txqs, rxqs);
+ netdev = mlx5e_create_netdev(dev, profile);
if (!netdev) {
mlx5_core_warn(dev,
"Failed to create representor netdev for vport %d\n",
@@ -1244,7 +1356,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv;
int err;
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL);
if (!rpriv)
return -ENOMEM;
@@ -1259,7 +1371,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
err = mlx5e_vport_vf_rep_load(dev, rep);
if (err)
- kfree(rpriv);
+ kvfree(rpriv);
return err;
}
@@ -1287,7 +1399,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
priv->profile->cleanup(priv);
mlx5e_destroy_netdev(priv);
free_ppriv:
- kfree(ppriv); /* mlx5e_rep_priv */
+ kvfree(ppriv); /* mlx5e_rep_priv */
}
static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index b01dacb6f527..b4e691760da9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -62,13 +62,9 @@ struct mlx5_tc_int_port_priv;
struct mlx5e_rep_bond;
struct mlx5e_tc_tun_encap;
struct mlx5e_post_act;
+struct mlx5e_flow_meters;
struct mlx5_rep_uplink_priv {
- /* Filters DB - instantiated by the uplink representor and shared by
- * the uplink's VFs
- */
- struct rhashtable tc_ht;
-
/* indirect block callbacks are invoked on bind/unbind events
* on registered higher level devices (e.g. tunnel devices)
*
@@ -102,6 +98,8 @@ struct mlx5_rep_uplink_priv {
/* OVS internal port support */
struct mlx5e_tc_int_port_priv *int_port_priv;
+
+ struct mlx5e_flow_meters *flow_meters;
};
struct mlx5e_rep_priv {
@@ -113,6 +111,8 @@ struct mlx5e_rep_priv {
struct list_head vport_sqs_list;
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
struct rtnl_link_stats64 prev_vf_vport_stats;
+ struct mlx5_flow_handle *send_to_vport_meta_rule;
+ struct rhashtable tc_ht;
};
static inline
@@ -183,6 +183,13 @@ struct mlx5e_decap_entry {
struct rcu_head rcu;
};
+struct mlx5e_mpls_info {
+ u32 label;
+ u8 tc;
+ u8 bos;
+ u8 ttl;
+};
+
struct mlx5e_encap_entry {
/* attached neigh hash entry */
struct mlx5e_neigh_hash_entry *nhe;
@@ -196,6 +203,7 @@ struct mlx5e_encap_entry {
struct list_head route_list;
struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
struct net_device *out_dev;
@@ -234,8 +242,8 @@ int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
void *sp);
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv);
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
@@ -249,8 +257,8 @@ static inline bool mlx5e_eswitch_rep(const struct net_device *netdev)
#else /* CONFIG_MLX5_ESWITCH */
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
-static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {}
static inline int mlx5e_rep_init(void) { return 0; };
static inline void mlx5e_rep_cleanup(void) {};
static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 793511d5ee4c..a61a43fc8d5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -34,11 +34,14 @@
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/bitmap.h>
+#include <linux/filter.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
+#include <net/gro.h>
#include <net/udp.h>
#include <net/tcp.h>
+#include <net/xdp_sock_drv.h>
#include "en.h"
#include "en/txrx.h"
#include "en_tc.h"
@@ -46,10 +49,10 @@
#include "en_rep.h"
#include "en/rep/tc.h"
#include "ipoib/ipoib.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/macsec.h"
#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/tls_rxtx.h"
+#include "en_accel/ktls_txrx.h"
#include "en/xdp.h"
#include "en/xsk/rx.h"
#include "en/health.h"
@@ -220,8 +223,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
}
-static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
{
struct mlx5e_page_cache *cache = &rq->page_cache;
u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
@@ -232,120 +234,101 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
return false;
}
- if (!dev_page_is_reusable(dma_info->page)) {
+ if (!dev_page_is_reusable(page)) {
stats->cache_waive++;
return false;
}
- cache->page_cache[cache->tail] = *dma_info;
+ cache->page_cache[cache->tail] = page;
cache->tail = tail_next;
return true;
}
-static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
{
struct mlx5e_page_cache *cache = &rq->page_cache;
struct mlx5e_rq_stats *stats = rq->stats;
+ dma_addr_t addr;
if (unlikely(cache->head == cache->tail)) {
stats->cache_empty++;
return false;
}
- if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+ if (page_ref_count(cache->page_cache[cache->head]) != 1) {
stats->cache_busy++;
return false;
}
- *dma_info = cache->page_cache[cache->head];
+ au->page = cache->page_cache[cache->head];
cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
stats->cache_reuse++;
- dma_sync_single_for_device(rq->pdev, dma_info->addr,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
+ addr = page_pool_get_dma_addr(au->page);
+ /* Non-XSK always uses PAGE_SIZE. */
+ dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
return true;
}
-static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
{
- if (mlx5e_rx_cache_get(rq, dma_info))
+ dma_addr_t addr;
+
+ if (mlx5e_rx_cache_get(rq, au))
return 0;
- dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
- if (unlikely(!dma_info->page))
+ au->page = page_pool_dev_alloc_pages(rq->page_pool);
+ if (unlikely(!au->page))
return -ENOMEM;
- dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
- PAGE_SIZE, rq->buff.map_dir);
- if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
- page_pool_recycle_direct(rq->page_pool, dma_info->page);
- dma_info->page = NULL;
+ /* Non-XSK always uses PAGE_SIZE. */
+ addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
+ if (unlikely(dma_mapping_error(rq->pdev, addr))) {
+ page_pool_recycle_direct(rq->page_pool, au->page);
+ au->page = NULL;
return -ENOMEM;
}
+ page_pool_set_dma_addr(au->page, addr);
return 0;
}
-static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
{
- if (rq->xsk_pool)
- return mlx5e_xsk_page_alloc_pool(rq, dma_info);
- else
- return mlx5e_page_alloc_pool(rq, dma_info);
-}
+ dma_addr_t dma_addr = page_pool_get_dma_addr(page);
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
-{
- dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
+ dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ page_pool_set_dma_addr(page, 0);
}
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle)
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
{
if (likely(recycle)) {
- if (mlx5e_rx_cache_put(rq, dma_info))
+ if (mlx5e_rx_cache_put(rq, page))
return;
- mlx5e_page_dma_unmap(rq, dma_info);
- page_pool_recycle_direct(rq->page_pool, dma_info->page);
+ mlx5e_page_dma_unmap(rq, page);
+ page_pool_recycle_direct(rq->page_pool, page);
} else {
- mlx5e_page_dma_unmap(rq, dma_info);
- page_pool_release_page(rq->page_pool, dma_info->page);
- put_page(dma_info->page);
+ mlx5e_page_dma_unmap(rq, page);
+ page_pool_release_page(rq->page_pool, page);
+ put_page(page);
}
}
-static inline void mlx5e_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle)
-{
- if (rq->xsk_pool)
- /* The `recycle` parameter is ignored, and the page is always
- * put into the Reuse Ring, because there is no way to return
- * the page to the userspace when the interface goes down.
- */
- xsk_buff_free(dma_info->xsk);
- else
- mlx5e_page_release_dynamic(rq, dma_info, recycle);
-}
-
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *frag)
{
int err = 0;
if (!frag->offset)
- /* On first frag (offset == 0), replenish page (dma_info actually).
- * Other frags that point to the same dma_info (with a different
+ /* On first frag (offset == 0), replenish page (alloc_unit actually).
+ * Other frags that point to the same alloc_unit (with a different
* offset) should just use the new one without replenishing again
* by themselves.
*/
- err = mlx5e_page_alloc(rq, frag->di);
+ err = mlx5e_page_alloc_pool(rq, frag->au);
return err;
}
@@ -355,7 +338,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
bool recycle)
{
if (frag->last_in_page)
- mlx5e_page_release(rq, frag->di, recycle);
+ mlx5e_page_release_dynamic(rq, frag->au->page, recycle);
}
static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
@@ -371,12 +354,16 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
int i;
for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
+ dma_addr_t addr;
+ u16 headroom;
+
err = mlx5e_get_rx_frag(rq, frag);
if (unlikely(err))
goto free_frags;
- wqe->data[i].addr = cpu_to_be64(frag->di->addr +
- frag->offset + rq->buff.headroom);
+ headroom = i == 0 ? rq->buff.headroom : 0;
+ addr = page_pool_get_dma_addr(frag->au->page);
+ wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom);
}
return 0;
@@ -394,6 +381,15 @@ static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
{
int i;
+ if (rq->xsk_pool) {
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ xsk_buff_free(wi->au->xsk);
+ return;
+ }
+
for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
mlx5e_put_rx_frag(rq, wi, recycle);
}
@@ -405,84 +401,78 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
mlx5e_free_rx_wqe(rq, wi, false);
}
-static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
+static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
- int err;
int i;
- if (rq->xsk_pool) {
- int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
-
- /* Check in advance that we have enough frames, instead of
- * allocating one-by-one, failing and moving frames to the
- * Reuse Ring.
- */
- if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired)))
- return -ENOMEM;
- }
-
for (i = 0; i < wqe_bulk; i++) {
- struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_rx_wqe_cyc *wqe;
- err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i);
- if (unlikely(err))
- goto free_wqes;
- }
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
- return 0;
-
-free_wqes:
- while (--i >= 0)
- mlx5e_dealloc_rx_wqe(rq, ix + i);
+ if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j)))
+ break;
+ }
- return err;
+ return i;
}
static inline void
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
- struct mlx5e_dma_info *di, u32 frag_offset, u32 len,
+ union mlx5e_alloc_unit *au, u32 frag_offset, u32 len,
unsigned int truesize)
{
- dma_sync_single_for_cpu(rq->pdev,
- di->addr + frag_offset,
- len, DMA_FROM_DEVICE);
- page_ref_inc(di->page);
+ dma_addr_t addr = page_pool_get_dma_addr(au->page);
+
+ dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
+ rq->buff.map_dir);
+ page_ref_inc(au->page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- di->page, frag_offset, len, truesize);
+ au->page, frag_offset, len, truesize);
}
static inline void
-mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
- struct mlx5e_dma_info *dma_info,
+mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct page *page, dma_addr_t addr,
int offset_from, int dma_offset, u32 headlen)
{
- const void *from = page_address(dma_info->page) + offset_from;
+ const void *from = page_address(page) + offset_from;
/* Aligning len to sizeof(long) optimizes memcpy performance */
unsigned int len = ALIGN(headlen, sizeof(long));
- dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len,
- DMA_FROM_DEVICE);
+ dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len,
+ rq->buff.map_dir);
skb_copy_to_linear_data(skb, from, len);
}
static void
mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
{
+ union mlx5e_alloc_unit *alloc_units = wi->alloc_units;
bool no_xdp_xmit;
- struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
int i;
/* A common case for AF_XDP. */
- if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
+ if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe))
return;
- no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
- MLX5_MPWRQ_PAGES_PER_WQE);
+ no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
- for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
- if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
- mlx5e_page_release(rq, &dma_info[i], recycle);
+ if (rq->xsk_pool) {
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ xsk_buff_free(alloc_units[i].xsk);
+ } else {
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle);
+ }
}
static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
@@ -567,11 +557,13 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
if (!(header_offset & (PAGE_SIZE - 1))) {
- err = mlx5e_page_alloc(rq, dma_info);
+ union mlx5e_alloc_unit au;
+
+ err = mlx5e_page_alloc_pool(rq, &au);
if (unlikely(err))
goto err_unmap;
- addr = dma_info->addr;
- page = dma_info->page;
+ page = dma_info->page = au.page;
+ addr = dma_info->addr = page_pool_get_dma_addr(au.page);
} else {
dma_info->addr = addr + header_offset;
dma_info->page = page;
@@ -604,7 +596,7 @@ err_unmap:
dma_info = &shampo->info[--index];
if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
- mlx5e_page_release(rq, dma_info, true);
+ mlx5e_page_release_dynamic(rq, dma_info->page, true);
}
}
rq->stats->buff_alloc_err++;
@@ -652,57 +644,55 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
- struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ union mlx5e_alloc_unit *au = &wi->alloc_units[0];
struct mlx5e_icosq *sq = rq->icosq;
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
+ u32 offset; /* 17-bit value with MTT. */
u16 pi;
int err;
int i;
- /* Check in advance that we have enough frames, instead of allocating
- * one-by-one, failing and moving frames to the Reuse Ring.
- */
- if (rq->xsk_pool &&
- unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) {
- err = -ENOMEM;
- goto err;
- }
-
if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
err = mlx5e_alloc_rx_hd_mpwqe(rq);
if (unlikely(err))
goto err;
}
- pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
+ pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs);
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
- for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
- err = mlx5e_page_alloc(rq, dma_info);
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
+ dma_addr_t addr;
+
+ err = mlx5e_page_alloc_pool(rq, au);
if (unlikely(err))
goto err_unmap;
- umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+ addr = page_pool_get_dma_addr(au->page);
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
}
- bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
wi->consumed_strides = 0;
umr_wqe->ctrl.opmod_idx_opcode =
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR);
- umr_wqe->uctrl.xlt_offset =
- cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix)));
+
+ offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
- .num_wqebbs = MLX5E_UMR_WQEBBS,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
.umr.rq = rq,
};
- sq->pc += MLX5E_UMR_WQEBBS;
+ sq->pc += rq->mpwqe.umr_wqebbs;
sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -710,8 +700,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
err_unmap:
while (--i >= 0) {
- dma_info--;
- mlx5e_page_release(rq, dma_info, true);
+ au--;
+ mlx5e_page_release_dynamic(rq, au->page, true);
}
err:
@@ -745,7 +735,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
if (hd_info->page != deleted_page) {
deleted_page = hd_info->page;
- mlx5e_page_release(rq, hd_info, false);
+ mlx5e_page_release_dynamic(rq, hd_info->page, false);
}
}
@@ -760,7 +750,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
/* Don't recycle, this function is called on rq/netdev close */
mlx5e_free_rx_mpwqe(rq, wi, false);
}
@@ -768,38 +758,51 @@ static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
- u8 wqe_bulk;
- int err;
+ int wqe_bulk, count;
+ bool busy = false;
+ u16 head;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return false;
- wqe_bulk = rq->wqe.info.wqe_bulk;
-
- if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
+ if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk)
return false;
if (rq->page_pool)
page_pool_nid_changed(rq->page_pool, numa_mem_id());
- do {
- u16 head = mlx5_wq_cyc_get_head(wq);
+ wqe_bulk = mlx5_wq_cyc_missing(wq);
+ head = mlx5_wq_cyc_get_head(wq);
- err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
- if (unlikely(err)) {
- rq->stats->buff_alloc_err++;
- break;
- }
+ /* Don't allow any newly allocated WQEs to share the same page with old
+ * WQEs that aren't completed yet. Stop earlier.
+ */
+ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
- mlx5_wq_cyc_push_n(wq, wqe_bulk);
- } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
+ if (!rq->xsk_pool)
+ count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
+ else if (likely(!rq->xsk_pool->dma_need_sync))
+ count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
+ else
+ /* If dma_need_sync is true, it's more efficient to call
+ * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch,
+ * because the latter does the same check and returns only one
+ * frame.
+ */
+ count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk);
+
+ mlx5_wq_cyc_push_n(wq, count);
+ if (unlikely(count != wqe_bulk)) {
+ rq->stats->buff_alloc_err++;
+ busy = true;
+ }
/* ensure wqes are visible to device before updating doorbell record */
dma_wmb();
mlx5_wq_cyc_update_db_record(wq);
- return !!err;
+ return busy;
}
void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
@@ -958,8 +961,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
rq->stats->congst_umr++;
-#define UMR_WQE_BULK (2)
- if (likely(missing < UMR_WQE_BULK))
+ if (likely(missing < rq->mpwqe.min_wqe_bulk))
return false;
if (rq->page_pool)
@@ -968,7 +970,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
head = rq->mpwqe.actual_wq_head;
i = missing;
do {
- alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+ alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
+ mlx5e_alloc_rx_mpwqe(rq, head);
if (unlikely(alloc_err))
break;
@@ -1115,7 +1118,7 @@ static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr
static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct tcphdr *skb_tcp_hd)
{
- u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
struct tcphdr *last_tcp_hd;
void *last_hd_addr;
@@ -1347,7 +1350,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
}
/* True when explicitly set via priv flag, or XDP prog is loaded */
- if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
+ get_cqe_tls_offload(cqe))
goto csum_unnecessary;
/* CQE csum doesn't cover padding octets in short ethernet
@@ -1408,11 +1412,15 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
skb->mac_len = ETH_HLEN;
- mlx5e_tls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
+ if (unlikely(get_cqe_tls_offload(cqe)))
+ mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
+ if (unlikely(mlx5e_macsec_is_rx_flow(cqe)))
+ mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
+
if (lro_num_seg > 1) {
mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
@@ -1487,7 +1495,7 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
static inline
struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
u32 frag_size, u16 headroom,
- u32 cqe_bcnt)
+ u32 cqe_bcnt, u32 metasize)
{
struct sk_buff *skb = build_skb(va, frag_size);
@@ -1499,6 +1507,9 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
skb_reserve(skb, headroom);
skb_put(skb, cqe_bcnt);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
return skb;
}
@@ -1506,86 +1517,161 @@ static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
u32 len, struct xdp_buff *xdp)
{
xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
- xdp_prepare_buff(xdp, va, headroom, len, false);
+ xdp_prepare_buff(xdp, va, headroom, len, true);
}
static struct sk_buff *
-mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
{
- struct mlx5e_dma_info *di = wi->di;
+ union mlx5e_alloc_unit *au = wi->au;
u16 rx_headroom = rq->buff.headroom;
- struct xdp_buff xdp;
+ struct bpf_prog *prog;
struct sk_buff *skb;
+ u32 metasize = 0;
void *va, *data;
+ dma_addr_t addr;
u32 frag_size;
- va = page_address(di->page) + wi->offset;
+ va = page_address(au->page) + wi->offset;
data = va + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
- frag_size, DMA_FROM_DEVICE);
- net_prefetchw(va); /* xdp_frame data area */
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
+ frag_size, rq->buff.map_dir);
net_prefetch(data);
- mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
- if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
- return NULL; /* page/packet was consumed by XDP */
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, au->page, prog, &xdp))
+ return NULL; /* page/packet was consumed by XDP */
- rx_headroom = xdp.data - xdp.data_hard_start;
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ cqe_bcnt = xdp.data_end - xdp.data;
+ }
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
if (unlikely(!skb))
return NULL;
/* queue up for recycling/reuse */
- page_ref_inc(di->page);
+ page_ref_inc(au->page);
return skb;
}
static struct sk_buff *
-mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
{
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
struct mlx5e_wqe_frag_info *head_wi = wi;
- u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
- u16 frag_headlen = headlen;
- u16 byte_cnt = cqe_bcnt - headlen;
+ union mlx5e_alloc_unit *au = wi->au;
+ u16 rx_headroom = rq->buff.headroom;
+ struct skb_shared_info *sinfo;
+ u32 frag_consumed_bytes;
+ struct bpf_prog *prog;
+ struct xdp_buff xdp;
struct sk_buff *skb;
+ dma_addr_t addr;
+ u32 truesize;
+ void *va;
- /* XDP is not supported in this configuration, as incoming packets
- * might spread among multiple pages.
- */
- skb = napi_alloc_skb(rq->cq.napi,
- ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
- if (unlikely(!skb)) {
- rq->stats->buff_alloc_err++;
- return NULL;
- }
+ va = page_address(au->page) + wi->offset;
+ frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
- net_prefetchw(skb->data);
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
+ rq->buff.frame0_sz, rq->buff.map_dir);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(va + rx_headroom);
+
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
+ truesize = 0;
+
+ cqe_bcnt -= frag_consumed_bytes;
+ frag_info++;
+ wi++;
- while (byte_cnt) {
- u16 frag_consumed_bytes =
- min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
+ while (cqe_bcnt) {
+ skb_frag_t *frag;
- mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen,
- frag_consumed_bytes, frag_info->frag_stride);
- byte_cnt -= frag_consumed_bytes;
- frag_headlen = 0;
+ au = wi->au;
+
+ frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
+ frag_consumed_bytes, rq->buff.map_dir);
+
+ if (!xdp_buff_has_frags(&xdp)) {
+ /* Init on the first fragment to avoid cold cache access
+ * when possible.
+ */
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, au->page);
+ skb_frag_off_set(frag, wi->offset);
+ skb_frag_size_set(frag, frag_consumed_bytes);
+
+ if (page_is_pfmemalloc(au->page))
+ xdp_buff_set_frag_pfmemalloc(&xdp);
+
+ sinfo->xdp_frags_size += frag_consumed_bytes;
+ truesize += frag_info->frag_stride;
+
+ cqe_bcnt -= frag_consumed_bytes;
frag_info++;
wi++;
}
- /* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset,
- headlen);
- /* skb linear part was allocated with headlen and aligned to long */
- skb->tail += headlen;
- skb->len += headlen;
+ au = head_wi->au;
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
+ if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ int i;
+
+ for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
+ mlx5e_put_rx_frag(rq, &head_wi[i], true);
+ }
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz,
+ xdp.data - xdp.data_hard_start,
+ xdp.data_end - xdp.data,
+ xdp.data - xdp.data_meta);
+ if (unlikely(!skb))
+ return NULL;
+
+ page_ref_inc(au->page);
+
+ if (unlikely(xdp_buff_has_frags(&xdp))) {
+ int i;
+
+ /* sinfo->nr_frags is reset by build_skb, calculate again. */
+ xdp_update_skb_shared_info(skb, wi - head_wi - 1,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&xdp));
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ page_ref_inc(skb_frag_page(frag));
+ }
+ }
return skb;
}
@@ -1602,6 +1688,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
}
}
+static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ trigger_report(rq, cqe);
+ rq->stats->wqe_err++;
+}
+
static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -1615,15 +1707,15 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
- skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe,
mlx5e_skb_from_cqe_linear,
mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
+ mlx5e_xsk_skb_from_cqe_linear,
+ rq, wi, cqe_bcnt);
if (!skb) {
/* probably for XDP */
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -1669,14 +1761,14 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
mlx5e_skb_from_cqe_linear,
mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
+ rq, wi, cqe_bcnt);
if (!skb) {
/* probably for XDP */
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -1705,11 +1797,11 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
- u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
- u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
+ u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
struct mlx5e_rx_wqe_ll *wqe;
struct mlx5_wq_ll *wq;
struct sk_buff *skb;
@@ -1718,8 +1810,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -1761,12 +1852,13 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
#endif
static void
-mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- u32 data_bcnt, u32 data_offset)
+mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
+ union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset)
{
net_prefetchw(skb->data);
while (data_bcnt) {
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
unsigned int truesize;
@@ -1775,12 +1867,12 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_i
else
truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
- mlx5e_add_skb_frag(rq, skb, di, data_offset,
+ mlx5e_add_skb_frag(rq, skb, au, data_offset,
pg_consumed_bytes, truesize);
data_bcnt -= pg_consumed_bytes;
data_offset = 0;
- di++;
+ au++;
}
}
@@ -1788,12 +1880,13 @@ static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
{
+ union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
u32 frag_offset = head_offset + headlen;
u32 byte_cnt = cqe_bcnt - headlen;
- struct mlx5e_dma_info *head_di = di;
+ union mlx5e_alloc_unit *head_au = au;
struct sk_buff *skb;
+ dma_addr_t addr;
skb = napi_alloc_skb(rq->cq.napi,
ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
@@ -1804,14 +1897,17 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
net_prefetchw(skb->data);
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
if (unlikely(frag_offset >= PAGE_SIZE)) {
- di++;
+ au++;
frag_offset -= PAGE_SIZE;
}
- mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset);
+ mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
/* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen);
+ addr = page_pool_get_dma_addr(head_au->page);
+ mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
+ head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
@@ -1823,12 +1919,13 @@ static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
{
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
u16 rx_headroom = rq->buff.headroom;
- u32 cqe_bcnt32 = cqe_bcnt;
- struct xdp_buff xdp;
+ struct bpf_prog *prog;
struct sk_buff *skb;
+ u32 metasize = 0;
void *va, *data;
+ dma_addr_t addr;
u32 frag_size;
/* Check packet size. Note LRO doesn't use linear SKB */
@@ -1837,35 +1934,43 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return NULL;
}
- va = page_address(di->page) + head_offset;
+ va = page_address(au->page) + head_offset;
data = va + rx_headroom;
- frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
- frag_size, DMA_FROM_DEVICE);
- net_prefetchw(va); /* xdp_frame data area */
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
+ frag_size, rq->buff.map_dir);
net_prefetch(data);
- mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
- if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
- if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
- __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
- return NULL; /* page/packet was consumed by XDP */
- }
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
- rx_headroom = xdp.data - xdp.data_hard_start;
- frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
- skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ cqe_bcnt = xdp.data_end - xdp.data;
+ }
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
if (unlikely(!skb))
return NULL;
/* queue up for recycling/reuse */
- page_ref_inc(di->page);
+ page_ref_inc(au->page);
return skb;
}
-static void
+static struct sk_buff *
mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5_cqe64 *cqe, u16 header_index)
{
@@ -1883,13 +1988,13 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
/* build SKB around header */
- dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE);
+ dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
prefetchw(hdr);
prefetch(data);
- skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
+ skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
if (unlikely(!skb))
- return;
+ return NULL;
/* queue up for recycling/reuse */
page_ref_inc(head->page);
@@ -1901,20 +2006,18 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
ALIGN(head_size, sizeof(long)));
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
- return;
+ return NULL;
}
prefetchw(skb->data);
- mlx5e_copy_skb_header(rq->pdev, skb, head,
+ mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
head_offset + rx_headroom,
rx_headroom, head_size);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += head_size;
skb->len += head_size;
}
- rq->hw_gro_data->skb = skb;
- NAPI_GRO_CB(skb)->count = 1;
- skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size;
+ return skb;
}
static void
@@ -1948,7 +2051,7 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
{
int nr_frags = skb_shinfo(skb)->nr_frags;
- return PAGE_SIZE * nr_frags + data_bcnt <= GSO_MAX_SIZE;
+ return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
}
static void
@@ -1959,7 +2062,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
- mlx5e_page_release(rq, &shampo->info[header_index], true);
+ mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true);
}
bitmap_clear(shampo->bitmap, header_index, 1);
}
@@ -1967,28 +2070,28 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
- u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u16 head_size = cqe->shampo.header_size;
struct sk_buff **skb = &rq->hw_gro_data->skb;
bool flush = cqe->shampo.flush;
bool match = cqe->shampo.match;
struct mlx5e_rq_stats *stats = rq->stats;
struct mlx5e_rx_wqe_ll *wqe;
- struct mlx5e_dma_info *di;
+ union mlx5e_alloc_unit *au;
struct mlx5e_mpw_info *wi;
struct mlx5_wq_ll *wq;
- wi = &rq->mpwqe.info[wqe_id];
+ wi = mlx5e_get_mpw_info(rq, wqe_id);
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -2006,9 +2109,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
}
if (!*skb) {
- mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ if (likely(head_size))
+ *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ else
+ *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
+ page_idx);
if (unlikely(!*skb))
goto free_hd_entry;
+
+ NAPI_GRO_CB(*skb)->count = 1;
+ skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
} else {
NAPI_GRO_CB(*skb)->count++;
if (NAPI_GRO_CB(*skb)->count == 2 &&
@@ -2022,8 +2132,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
}
}
- di = &wi->umr.dma_info[page_idx];
- mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
+ if (likely(head_size)) {
+ au = &wi->alloc_units[page_idx];
+ mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset);
+ }
mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
if (flush)
@@ -2044,11 +2156,11 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
- u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
- u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
+ u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
struct mlx5e_rx_wqe_ll *wqe;
struct mlx5_wq_ll *wq;
struct sk_buff *skb;
@@ -2057,8 +2169,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -2072,9 +2183,10 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
- skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+ skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq,
mlx5e_skb_from_cqe_mpwrq_linear,
mlx5e_skb_from_cqe_mpwrq_nonlinear,
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear,
rq, wi, cqe_bcnt, head_offset, page_idx);
if (!skb)
goto mpwrq_cqe_out;
@@ -2188,7 +2300,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
priv = mlx5i_epriv(netdev);
tstamp = &priv->tstamp;
- stats = &priv->channel_stats[rq->ix].rq;
+ stats = rq->stats;
flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
g = (flags_rqpn >> 28) & 3;
@@ -2263,7 +2375,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
mlx5e_skb_from_cqe_linear,
mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
+ rq, wi, cqe_bcnt);
if (!skb)
goto wq_free_wqe;
@@ -2285,46 +2397,6 @@ const struct mlx5e_rx_handlers mlx5i_rx_handlers = {
};
#endif /* CONFIG_MLX5_CORE_IPOIB */
-#ifdef CONFIG_MLX5_EN_IPSEC
-
-static void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
-{
- struct mlx5_wq_cyc *wq = &rq->wqe.wq;
- struct mlx5e_wqe_frag_info *wi;
- struct sk_buff *skb;
- u32 cqe_bcnt;
- u16 ci;
-
- ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
- wi = get_frag(rq, ci);
- cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
-
- if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- rq->stats->wqe_err++;
- goto wq_free_wqe;
- }
-
- skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
- mlx5e_skb_from_cqe_linear,
- mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
- if (unlikely(!skb)) /* a DROP, save the page-reuse checks */
- goto wq_free_wqe;
-
- skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
- if (unlikely(!skb))
- goto wq_free_wqe;
-
- mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
- napi_gro_receive(rq->cq.napi, skb);
-
-wq_free_wqe:
- mlx5e_free_rx_wqe(rq, wi, true);
- mlx5_wq_cyc_pop(wq);
-}
-
-#endif /* CONFIG_MLX5_EN_IPSEC */
-
int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk)
{
struct net_device *netdev = rq->netdev;
@@ -2341,10 +2413,6 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
- if (mlx5_fpga_is_ipsec_device(mdev)) {
- netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
- return -EINVAL;
- }
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
if (!rq->handle_rx_cqe) {
@@ -2363,19 +2431,12 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
default: /* MLX5_WQ_TYPE_CYCLIC */
rq->wqe.skb_from_cqe = xsk ?
mlx5e_xsk_skb_from_cqe_linear :
- mlx5e_rx_is_linear_skb(params, NULL) ?
+ mlx5e_rx_is_linear_skb(mdev, params, NULL) ?
mlx5e_skb_from_cqe_linear :
mlx5e_skb_from_cqe_nonlinear;
rq->post_wqes = mlx5e_post_rx_wqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
-
-#ifdef CONFIG_MLX5_EN_IPSEC
- if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) &&
- priv->ipsec)
- rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
- else
-#endif
- rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
if (!rq->handle_rx_cqe) {
netdev_err(netdev, "RX handler of RQ is not set\n");
return -EINVAL;
@@ -2406,7 +2467,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
goto free_wqe;
}
- skb = mlx5e_skb_from_cqe_nonlinear(rq, cqe, wi, cqe_bcnt);
+ skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt);
if (!skb)
goto free_wqe;
@@ -2424,7 +2485,7 @@ free_wqe:
void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params)
{
- rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params, NULL) ?
+ rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ?
mlx5e_skb_from_cqe_linear :
mlx5e_skb_from_cqe_nonlinear;
rq->post_wqes = mlx5e_post_rx_wqes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 8c9163d2c646..08a75654f5f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -334,6 +334,7 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
netdev_info(ndev, "\t[%d] %s start..\n", i, st.name);
buf[count] = st.st_func(priv);
netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]);
+ count++;
}
mutex_unlock(&priv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 2a9bfc3ffa2e..03c1841970f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -32,9 +32,14 @@
#include "lib/mlx5.h"
#include "en.h"
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
#include "en_accel/en_accel.h"
#include "en/ptp.h"
+#include "en/port.h"
+
+#ifdef CONFIG_PAGE_POOL_STATS
+#include <net/page_pool.h>
+#endif
static unsigned int stats_grps_num(struct mlx5e_priv *priv)
{
@@ -182,6 +187,19 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
+#ifdef CONFIG_PAGE_POOL_STATS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) },
+#endif
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) },
@@ -348,6 +366,19 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
s->rx_congst_umr += rq_stats->congst_umr;
s->rx_arfs_err += rq_stats->arfs_err;
s->rx_recover += rq_stats->recover;
+#ifdef CONFIG_PAGE_POOL_STATS
+ s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast;
+ s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow;
+ s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty;
+ s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill;
+ s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive;
+ s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order;
+ s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached;
+ s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full;
+ s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring;
+ s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full;
+ s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref;
+#endif
#ifdef CONFIG_MLX5_EN_TLS
s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets;
s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes;
@@ -443,8 +474,8 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
int i;
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
- max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
- stats = READ_ONCE(priv->htb.qos_sq_stats);
+ max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ stats = READ_ONCE(priv->htb_qos_sq_stats);
for (i = 0; i < max_qos_sqs; i++) {
mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
@@ -454,6 +485,35 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
}
}
+#ifdef CONFIG_PAGE_POOL_STATS
+static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c)
+{
+ struct mlx5e_rq_stats *rq_stats = c->rq.stats;
+ struct page_pool *pool = c->rq.page_pool;
+ struct page_pool_stats stats = { 0 };
+
+ if (!page_pool_get_stats(pool, &stats))
+ return;
+
+ rq_stats->pp_alloc_fast = stats.alloc_stats.fast;
+ rq_stats->pp_alloc_slow = stats.alloc_stats.slow;
+ rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order;
+ rq_stats->pp_alloc_empty = stats.alloc_stats.empty;
+ rq_stats->pp_alloc_waive = stats.alloc_stats.waive;
+ rq_stats->pp_alloc_refill = stats.alloc_stats.refill;
+
+ rq_stats->pp_recycle_cached = stats.recycle_stats.cached;
+ rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full;
+ rq_stats->pp_recycle_ring = stats.recycle_stats.ring;
+ rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full;
+ rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt;
+}
+#else
+static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c)
+{
+}
+#endif
+
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -461,9 +521,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
memset(s, 0, sizeof(*s));
+ for (i = 0; i < priv->channels.num; i++) /* for active channels only */
+ mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]);
+
for (i = 0; i < priv->stats_nch; i++) {
struct mlx5e_channel_stats *channel_stats =
- &priv->channel_stats[i];
+ priv->channel_stats[i];
+
int j;
mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq);
@@ -577,17 +641,26 @@ static const struct counter_desc vnic_env_stats_dev_oob_desc[] = {
VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) },
};
+static const struct counter_desc vnic_env_stats_drop_desc[] = {
+ { "rx_oversize_pkts_buffer",
+ VNIC_ENV_OFF(vport_env.eth_wqe_too_small) },
+};
+
#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \
(MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \
ARRAY_SIZE(vnic_env_stats_steer_desc) : 0)
#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \
(MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \
ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0)
+#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \
+ (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \
+ ARRAY_SIZE(vnic_env_stats_drop_desc) : 0)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env)
{
return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) +
- NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev);
+ NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) +
+ NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env)
@@ -601,6 +674,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env)
for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
vnic_env_stats_dev_oob_desc[i].format);
+
+ for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_drop_desc[i].format);
+
return idx;
}
@@ -615,6 +693,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env)
for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
vnic_env_stats_dev_oob_desc, i);
+
+ for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++)
+ data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_drop_desc, i);
+
return idx;
}
@@ -624,7 +707,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
- if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ if (!mlx5e_stats_grp_vnic_env_num_stats(priv))
return;
MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
@@ -1158,16 +1241,100 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
-void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
- struct ethtool_fec_stats *fec_stats)
+static int fec_num_lanes(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return 0;
+
+ return MLX5_GET(pmlp_reg, out, width);
+}
+
+static int fec_active_mode(struct mlx5_core_dev *mdev)
+{
+ unsigned long fec_active_long;
+ u32 fec_active;
+
+ if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
+ return MLX5E_FEC_NOFEC;
+
+ fec_active_long = fec_active;
+ return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
+}
+
+#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
+ fec_stats->corrected_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_corrected_blocks_lane##idx); \
+ fec_stats->uncorrectable_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_uncorrectable_blocks_lane##idx); \
+})
+
+static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
+ u32 *ppcnt, u8 lanes)
+{
+ if (lanes > 3) { /* 4 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(3);
+ MLX5E_STATS_SET_FEC_BLOCK(2);
+ }
+ if (lanes > 1) /* 2 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(1);
+ if (lanes > 0) /* 1 lane */
+ MLX5E_STATS_SET_FEC_BLOCK(0);
+}
+
+static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
+{
+ fec_stats->corrected_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_corrected_blocks);
+ fec_stats->uncorrectable_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_uncorrectable_blocks);
+}
+
+static void fec_set_block_stats(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
{
- u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
struct mlx5_core_dev *mdev = priv->mdev;
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int mode = fec_active_mode(mdev);
- if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+ if (mode == MLX5E_FEC_NOFEC)
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ switch (mode) {
+ case MLX5E_FEC_RS_528_514:
+ case MLX5E_FEC_RS_544_514:
+ case MLX5E_FEC_LLRS_272_257_1:
+ fec_set_rs_stats(fec_stats, out);
return;
+ case MLX5E_FEC_FIRECODE:
+ fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
+ }
+}
+
+static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
@@ -1181,6 +1348,16 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
phy_corrected_bits);
}
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group))
+ return;
+
+ fec_set_corrected_bits_total(priv, fec_stats);
+ fec_set_block_stats(priv, fec_stats);
+}
+
#define PPORT_ETH_EXT_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
@@ -1742,17 +1919,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; }
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls)
{
- return mlx5e_tls_get_count(priv);
+ return mlx5e_ktls_get_count(priv);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls)
{
- return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+ return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls)
{
- return idx + mlx5e_tls_get_stats(priv, data + idx);
+ return idx + mlx5e_ktls_get_stats(priv, data + idx);
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; }
@@ -1792,6 +1969,19 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
+#ifdef CONFIG_PAGE_POOL_STATS
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) },
+#endif
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) },
@@ -1929,6 +2119,8 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) },
{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) },
{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) },
};
static const struct counter_desc ptp_rq_stats_desc[] = {
@@ -2013,13 +2205,13 @@ static const struct counter_desc qos_sq_stats_desc[] = {
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
{
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
- return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs);
+ return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb_max_qos_sqs);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
{
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
- u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+ u16 max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
int i, qid;
for (qid = 0; qid < max_qos_sqs; qid++)
@@ -2037,8 +2229,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
int i, qid;
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
- max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
- stats = READ_ONCE(priv->htb.qos_sq_stats);
+ max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ stats = READ_ONCE(priv->htb_qos_sq_stats);
for (qid = 0; qid < max_qos_sqs; qid++) {
struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
@@ -2076,7 +2268,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
for (i = 0; i < NUM_PTP_CH_STATS; i++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
- ptp_ch_stats_desc[i].format);
+ "%s", ptp_ch_stats_desc[i].format);
if (priv->tx_ptp_opened) {
for (tc = 0; tc < priv->max_opened_tc; tc++)
@@ -2197,21 +2389,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels)
for (i = 0; i < max_nch; i++)
for (j = 0; j < NUM_CH_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch,
ch_stats_desc, j);
for (i = 0; i < max_nch; i++) {
for (j = 0; j < NUM_RQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq,
rq_stats_desc, j);
for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq,
xskrq_stats_desc, j);
for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq,
rq_xdpsq_stats_desc, j);
}
@@ -2219,17 +2411,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels)
for (i = 0; i < max_nch; i++)
for (j = 0; j < NUM_SQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc],
sq_stats_desc, j);
for (i = 0; i < max_nch; i++) {
for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq,
xsksq_stats_desc, j);
for (j = 0; j < NUM_XDPSQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq,
xdpsq_stats_desc, j);
}
@@ -2253,7 +2445,7 @@ MLX5E_DEFINE_STATS_GRP(channels, 0);
MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
static MLX5E_DEFINE_STATS_GRP(tls, 0);
-static MLX5E_DEFINE_STATS_GRP(ptp, 0);
+MLX5E_DEFINE_STATS_GRP(ptp, 0);
static MLX5E_DEFINE_STATS_GRP(qos, 0);
/* The stats groups order is opposite to the update_stats() order calls */
@@ -2272,13 +2464,15 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
&MLX5E_STATS_GRP(pme),
#ifdef CONFIG_MLX5_EN_IPSEC
&MLX5E_STATS_GRP(ipsec_sw),
- &MLX5E_STATS_GRP(ipsec_hw),
#endif
&MLX5E_STATS_GRP(tls),
&MLX5E_STATS_GRP(channels),
&MLX5E_STATS_GRP(per_port_buff_congest),
&MLX5E_STATS_GRP(ptp),
&MLX5E_STATS_GRP(qos),
+#ifdef CONFIG_MLX5_EN_MACSEC
+ &MLX5E_STATS_GRP(macsec_hw),
+#endif
};
unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 2c1ed5b81be6..9f781085be47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -205,7 +205,19 @@ struct mlx5e_sw_stats {
u64 ch_aff_change;
u64 ch_force_irq;
u64 ch_eq_rearm;
-
+#ifdef CONFIG_PAGE_POOL_STATS
+ u64 rx_pp_alloc_fast;
+ u64 rx_pp_alloc_slow;
+ u64 rx_pp_alloc_slow_high_order;
+ u64 rx_pp_alloc_empty;
+ u64 rx_pp_alloc_refill;
+ u64 rx_pp_alloc_waive;
+ u64 rx_pp_recycle_cached;
+ u64 rx_pp_recycle_cache_full;
+ u64 rx_pp_recycle_ring;
+ u64 rx_pp_recycle_ring_full;
+ u64 rx_pp_recycle_released_ref;
+#endif
#ifdef CONFIG_MLX5_EN_TLS
u64 tx_tls_encrypted_packets;
u64 tx_tls_encrypted_bytes;
@@ -261,6 +273,10 @@ struct mlx5e_qcounter_stats {
u32 rx_if_down_packets;
};
+#define VNIC_ENV_GET(vnic_env_stats, c) \
+ MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \
+ vport_env.c)
+
struct mlx5e_vnic_env_stats {
__be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
};
@@ -352,6 +368,19 @@ struct mlx5e_rq_stats {
u64 congst_umr;
u64 arfs_err;
u64 recover;
+#ifdef CONFIG_PAGE_POOL_STATS
+ u64 pp_alloc_fast;
+ u64 pp_alloc_slow;
+ u64 pp_alloc_slow_high_order;
+ u64 pp_alloc_empty;
+ u64 pp_alloc_refill;
+ u64 pp_alloc_waive;
+ u64 pp_recycle_cached;
+ u64 pp_recycle_cache_full;
+ u64 pp_recycle_ring;
+ u64 pp_recycle_ring_full;
+ u64 pp_recycle_released_ref;
+#endif
#ifdef CONFIG_MLX5_EN_TLS
u64 tls_decrypted_packets;
u64 tls_decrypted_bytes;
@@ -428,6 +457,8 @@ struct mlx5e_ptp_cq_stats {
u64 err_cqe;
u64 abort;
u64 abort_abs_diff_ns;
+ u64 resync_cqe;
+ u64 resync_event;
};
struct mlx5e_stats {
@@ -457,7 +488,8 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio);
extern MLX5E_DECLARE_STATS_GRP(pme);
extern MLX5E_DECLARE_STATS_GRP(channels);
extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
-extern MLX5E_DECLARE_STATS_GRP(ipsec_hw);
extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
+extern MLX5E_DECLARE_STATS_GRP(ptp);
+extern MLX5E_DECLARE_STATS_GRP(macsec_hw);
#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3d45f4ae80c0..5a6aa61ec82a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -39,10 +39,6 @@
#include <linux/rhashtable.h>
#include <linux/refcount.h>
#include <linux/completion.h>
-#include <linux/if_macvlan.h>
-#include <net/tc_act/tc_pedit.h>
-#include <net/tc_act/tc_csum.h>
-#include <net/psample.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
#include <net/bareudp.h>
@@ -62,6 +58,8 @@
#include "en/mod_hdr.h"
#include "en/tc_tun_encap.h"
#include "en/tc/sample.h"
+#include "en/tc/act/act.h"
+#include "en/tc/post_meter.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "lib/fs_chains.h"
@@ -70,12 +68,33 @@
#include "lag/lag.h"
#include "lag/mp.h"
-#define nic_chains(priv) ((priv)->fs.tc.chains)
-#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
-
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
+struct mlx5e_tc_table {
+ /* Protects the dynamic assignment of the t parameter
+ * which is the nic tc root table.
+ */
+ struct mutex t_lock;
+ struct mlx5e_priv *priv;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_table *miss_t;
+ struct mlx5_fs_chains *chains;
+ struct mlx5e_post_act *post_act;
+
+ struct rhashtable ht;
+
+ struct mod_hdr_tbl mod_hdr;
+ struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
+ DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+ struct notifier_block netdevice_nb;
+ struct netdev_net_notifier netdevice_nn;
+
+ struct mlx5_tc_ct_priv *ct;
+ struct mapping_ctx *mapping;
+};
+
struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[CHAIN_TO_REG] = {
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
@@ -110,8 +129,27 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
.mlen = 16,
},
[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
+ [PACKET_COLOR_TO_REG] = packet_color_to_reg,
};
+struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
+{
+ struct mlx5e_tc_table *tc;
+
+ tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
+ return tc ? tc : ERR_PTR(-ENOMEM);
+}
+
+void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
+{
+ kvfree(tc);
+}
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
+{
+ return tc->chains;
+}
+
/* To avoid false lock dependency warning set the tc_ht lock
* class different than the lock class of the ht being used when deleting
* last flow from a group and then deleting a group, we get into del_sw_flow_group()
@@ -121,6 +159,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
static struct lock_class_key tc_ht_lock_key;
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
+static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
@@ -209,12 +248,9 @@ mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
char *modact;
int err;
- err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
- if (err)
- return err;
-
- modact = mod_hdr_acts->actions +
- (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+ modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
/* Firmware has 5bit length field and 0 means 32bits */
if (mlen == 32)
@@ -248,9 +284,34 @@ mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
return NULL;
}
+struct mlx5e_flow_meters *
+mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_priv *priv;
+
+ if (is_mdev_switchdev_mode(dev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ priv = netdev_priv(uplink_rpriv->netdev);
+ if (!uplink_priv->flow_meters)
+ uplink_priv->flow_meters =
+ mlx5e_flow_meters_init(priv,
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
+ if (!IS_ERR(uplink_priv->flow_meters))
+ return uplink_priv->flow_meters;
+ }
+
+ return NULL;
+}
+
static struct mlx5_tc_ct_priv *
get_ct_priv(struct mlx5e_priv *priv)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
@@ -262,7 +323,7 @@ get_ct_priv(struct mlx5e_priv *priv)
return uplink_priv->ct_priv;
}
- return priv->fs.tc.ct;
+ return tc->ct;
}
static struct mlx5e_tc_psample *
@@ -282,6 +343,24 @@ get_sample_priv(struct mlx5e_priv *priv)
return NULL;
}
+static struct mlx5e_post_act *
+get_post_action(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->post_act;
+ }
+
+ return tc->post_act;
+}
+
struct mlx5_flow_handle *
mlx5_tc_rule_insert(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
@@ -304,13 +383,121 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
if (is_mdev_switchdev_mode(priv->mdev)) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-
return;
}
mlx5e_del_offloaded_nic_rule(priv, rule, attr);
}
+static bool
+is_flow_meter_action(struct mlx5_flow_attr *attr)
+{
+ return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
+}
+
+static int
+mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_post_act *post_act = get_post_action(priv);
+ struct mlx5e_post_meter_priv *post_meter;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
+ if (IS_ERR(meter)) {
+ mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
+ return PTR_ERR(meter);
+ }
+
+ ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
+ post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter,
+ meter->red_counter);
+ if (IS_ERR(post_meter)) {
+ mlx5_core_err(priv->mdev, "Failed to init post meter\n");
+ goto err_meter_init;
+ }
+
+ attr->meter_attr.meter = meter;
+ attr->meter_attr.post_meter = post_meter;
+ attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+
+err_meter_init:
+ mlx5e_tc_meter_put(meter);
+ return PTR_ERR(post_meter);
+}
+
+static void
+mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr)
+{
+ mlx5e_post_meter_cleanup(attr->meter_attr.post_meter);
+ mlx5e_tc_meter_put(attr->meter_attr.meter);
+}
+
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
+ &attr->parse_attr->mod_hdr_acts;
+
+ return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
+ spec, attr,
+ mod_hdr_acts);
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev))
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
+ return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
+
+ if (is_flow_meter_action(attr)) {
+ err = mlx5e_tc_add_flow_meter(priv, attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+}
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
+ return;
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev)) {
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+ return;
+ }
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
+ mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
+ return;
+ }
+
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+
+ if (attr->meter_attr.meter)
+ mlx5e_tc_del_flow_meter(attr);
+}
+
int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
@@ -333,7 +520,7 @@ void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
char *modact;
- modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
+ modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
/* Firmware has 5bit length field and 0 means 32bits */
if (mlen == 32)
@@ -403,7 +590,7 @@ bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
return flow_flag_test(flow, ESWITCH);
}
-static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, FT);
}
@@ -413,7 +600,7 @@ bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
return flow_flag_test(flow, OFFLOADED);
}
-static int get_flow_name_space(struct mlx5e_tc_flow *flow)
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
{
return mlx5e_is_eswitch_flow(flow) ?
MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
@@ -422,11 +609,12 @@ static int get_flow_name_space(struct mlx5e_tc_flow *flow)
static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
+ return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
&esw->offloads.mod_hdr :
- &priv->fs.tc.mod_hdr;
+ &tc->mod_hdr;
}
static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
@@ -437,7 +625,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
struct mlx5e_mod_hdr_handle *mh;
mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
- get_flow_name_space(flow),
+ mlx5e_get_flow_namespace(flow),
&parse_attr->mod_hdr_acts);
if (IS_ERR(mh))
return PTR_ERR(mh);
@@ -625,6 +813,7 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
{
struct mlx5e_priv *priv = hp->func_priv;
struct ttc_params ttc_params;
+ struct mlx5_ttc_table *ttc;
int err;
err = mlx5e_hairpin_create_indirect_rqt(hp);
@@ -642,9 +831,10 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
goto err_create_ttc_table;
}
+ ttc = mlx5e_fs_get_ttc(priv->fs, false);
netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
hp->num_channels,
- mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
+ mlx5_get_ttc_flow_table(ttc)->id);
return 0;
@@ -731,10 +921,11 @@ static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
u16 peer_vhca_id, u8 prio)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5e_hairpin_entry *hpe;
u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
- hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
+ hash_for_each_possible(tc->hairpin_tbl, hpe,
hairpin_hlist, hash_key) {
if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
refcount_inc(&hpe->refcnt);
@@ -748,11 +939,12 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
struct mlx5e_hairpin_entry *hpe)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
/* no more hairpin flows for us, release the hairpin pair */
- if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
return;
hash_del(&hpe->hairpin_hlist);
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
if (!IS_ERR_OR_NULL(hpe->hp)) {
netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
@@ -808,6 +1000,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
int peer_ifindex = parse_attr->mirred_ifindex[0];
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
@@ -836,10 +1029,10 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
if (err)
return err;
- mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_lock(&tc->hairpin_tbl_lock);
hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
if (hpe) {
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
wait_for_completion(&hpe->res_ready);
if (IS_ERR(hpe->hp)) {
@@ -851,7 +1044,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
if (!hpe) {
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
return -ENOMEM;
}
@@ -863,9 +1056,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
refcount_set(&hpe->refcnt, 1);
init_completion(&hpe->res_ready);
- hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
+ hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
hash_hairpin_info(peer_id, match_prio));
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
params.log_data_size = 16;
params.log_data_size = min_t(u8, params.log_data_size,
@@ -941,10 +1134,11 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
struct mlx5_flow_context *flow_context = &spec->flow_context;
- struct mlx5_fs_chains *nic_chains = nic_chains(priv);
+ struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
- struct mlx5e_tc_table *tc = &priv->fs.tc;
struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_fs_chains *nic_chains;
struct mlx5_flow_act flow_act = {
.action = attr->action,
.flags = FLOW_ACT_NO_APPEND,
@@ -953,6 +1147,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
struct mlx5_flow_table *ft;
int dest_ix = 0;
+ nic_chains = mlx5e_nic_chains(tc);
flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
flow_context->flow_tag = nic_attr->flow_tag;
@@ -977,7 +1172,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
if (IS_ERR(dest[dest_ix].ft))
return ERR_CAST(dest[dest_ix].ft);
} else {
- dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
+ dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
}
dest_ix++;
}
@@ -1005,7 +1200,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
mutex_unlock(&tc->t_lock);
netdev_err(priv->netdev,
"Failed to create tc offload table\n");
- rule = ERR_CAST(priv->fs.tc.t);
+ rule = ERR_CAST(tc->t);
goto err_ft_get;
}
}
@@ -1048,6 +1243,21 @@ err_ft_get:
}
static int
+alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
+ struct mlx5_flow_attr *attr)
+
+{
+ struct mlx5_fc *counter;
+
+ counter = mlx5_fc_create(counter_dev, true);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
+ attr->counter = counter;
+ return 0;
+}
+
+static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
@@ -1055,7 +1265,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_core_dev *dev = priv->mdev;
- struct mlx5_fc *counter;
int err;
parse_attr = attr->parse_attr;
@@ -1067,22 +1276,20 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(dev, true);
- if (IS_ERR(counter))
- return PTR_ERR(counter);
-
- attr->counter = counter;
+ err = alloc_flow_attr_counter(dev, attr);
+ if (err)
+ return err;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
if (err)
return err;
}
- if (flow_flag_test(flow, CT))
- flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
attr, &parse_attr->mod_hdr_acts);
else
flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
@@ -1095,8 +1302,10 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
struct mlx5_flow_handle *rule,
struct mlx5_flow_attr *attr)
{
- struct mlx5_fs_chains *nic_chains = nic_chains(priv);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_fs_chains *nic_chains;
+ nic_chains = mlx5e_nic_chains(tc);
mlx5_del_flow_rules(rule);
if (attr->chain || attr->prio)
@@ -1111,37 +1320,39 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_flow_attr *attr = flow->attr;
- struct mlx5e_tc_table *tc = &priv->fs.tc;
flow_flag_clear(flow, OFFLOADED);
- if (flow_flag_test(flow, CT))
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
else if (!IS_ERR_OR_NULL(flow->rule[0]))
mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
/* Remove root table if no rules are left to avoid
* extra steering hops.
*/
- mutex_lock(&priv->fs.tc.t_lock);
+ mutex_lock(&tc->t_lock);
if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
!IS_ERR_OR_NULL(tc->t)) {
- mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
- priv->fs.tc.t = NULL;
+ mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
+ tc->t = NULL;
}
- mutex_unlock(&priv->fs.tc.t_lock);
-
- kvfree(attr->parse_attr);
+ mutex_unlock(&tc->t_lock);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
- mlx5_fc_destroy(priv->mdev, attr->counter);
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(priv->mdev, attr->counter);
if (flow_flag_test(flow, HAIRPIN))
mlx5e_hairpin_flow_del(priv, flow);
+ free_flow_post_acts(flow);
+
+ kvfree(attr->parse_attr);
kfree(flow->attr);
}
@@ -1151,40 +1362,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr)
{
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
struct mlx5_flow_handle *rule;
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
- if (flow_flag_test(flow, CT)) {
- mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
-
- rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
- flow, spec, attr,
- mod_hdr_acts);
- } else if (flow_flag_test(flow, SAMPLE)) {
- rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
- mlx5e_tc_get_flow_tun_id(flow));
- } else {
- rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
- }
+ rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
if (IS_ERR(rule))
return rule;
if (attr->esw_attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
- if (IS_ERR(flow->rule[1])) {
- if (flow_flag_test(flow, CT))
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
- else
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
- return flow->rule[1];
- }
+ if (IS_ERR(flow->rule[1]))
+ goto err_rule1;
}
return rule;
+
+err_rule1:
+ mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
+ return flow->rule[1];
}
void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
@@ -1193,24 +1391,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
{
flow_flag_clear(flow, OFFLOADED);
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
- goto offload_rule_0;
-
- if (flow_flag_test(flow, CT)) {
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
- return;
- }
-
- if (flow_flag_test(flow, SAMPLE)) {
- mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
- return;
- }
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+ return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
if (attr->esw_attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
-offload_rule_0:
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
}
struct mlx5_flow_handle *
@@ -1218,8 +1405,13 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
{
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5e_mod_hdr_handle *mh = NULL;
struct mlx5_flow_attr *slow_attr;
struct mlx5_flow_handle *rule;
+ bool fwd_and_modify_cap;
+ u32 chain_mapping = 0;
+ int err;
slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
if (!slow_attr)
@@ -1228,15 +1420,58 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->esw_attr->split_count = 0;
- slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
+ if (!fwd_and_modify_cap)
+ goto skip_restore;
+
+ err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
+ if (err)
+ goto err_get_chain;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ CHAIN_TO_REG, chain_mapping);
+ if (err)
+ goto err_reg_set;
+
+ mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
+ MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
+ if (IS_ERR(mh)) {
+ err = PTR_ERR(mh);
+ goto err_attach;
+ }
+
+ slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
+skip_restore:
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
- if (!IS_ERR(rule))
- flow_flag_set(flow, SLOW);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto err_offload;
+ }
+
+ flow->slow_mh = mh;
+ flow->chain_mapping = chain_mapping;
+ flow_flag_set(flow, SLOW);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
kfree(slow_attr);
return rule;
+
+err_offload:
+ if (fwd_and_modify_cap)
+ mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
+err_attach:
+err_reg_set:
+ if (fwd_and_modify_cap)
+ mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
+err_get_chain:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ kfree(slow_attr);
+ return ERR_PTR(err);
}
void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
@@ -1253,8 +1488,18 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->esw_attr->split_count = 0;
- slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+ if (flow->slow_mh) {
+ slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh);
+ }
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+ if (flow->slow_mh) {
+ mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh);
+ mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
+ flow->chain_mapping = 0;
+ flow->slow_mh = NULL;
+ }
flow_flag_clear(flow, SLOW);
kfree(slow_attr);
}
@@ -1308,8 +1553,6 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
mutex_unlock(&uplink_priv->unready_flows_lock);
}
-static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
-
bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
{
struct mlx5_core_dev *out_mdev, *route_mdev;
@@ -1320,11 +1563,14 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
route_priv = netdev_priv(route_dev);
route_mdev = route_priv->mdev;
- if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
- route_mdev->coredev_type != MLX5_COREDEV_VF)
+ if (out_mdev->coredev_type != MLX5_COREDEV_PF)
return false;
- return same_hw_devs(out_priv, route_priv);
+ if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
+ route_mdev->coredev_type != MLX5_COREDEV_SF)
+ return false;
+
+ return mlx5e_same_hw_devs(out_priv, route_priv);
}
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
@@ -1364,26 +1610,120 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
}
int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
{
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
struct mlx5_modify_hdr *mod_hdr;
mod_hdr = mlx5_modify_header_alloc(priv->mdev,
- get_flow_name_space(flow),
+ mlx5e_get_flow_namespace(flow),
mod_hdr_acts->num_actions,
mod_hdr_acts->actions);
if (IS_ERR(mod_hdr))
return PTR_ERR(mod_hdr);
- WARN_ON(flow->attr->modify_hdr);
- flow->attr->modify_hdr = mod_hdr;
+ WARN_ON(attr->modify_hdr);
+ attr->modify_hdr = mod_hdr;
return 0;
}
static int
+set_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack,
+ bool *encap_valid,
+ bool *vf_tun)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *encap_dev = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ int out_index;
+ int err = 0;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return 0;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+ *encap_valid = true;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ struct net_device *out_dev;
+ int mirred_ifindex;
+
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
+ out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+ if (!out_dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+ err = -ENODEV;
+ goto out;
+ }
+ err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
+ extack, &encap_dev, encap_valid);
+ dev_put(out_dev);
+ if (err)
+ goto out;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[out_index].rep = rpriv->rep;
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
+ }
+
+ if (*vf_tun && esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static void
+clean_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ bool *vf_tun)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ int out_index;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ mlx5e_detach_encap(priv, flow, attr, out_index);
+ kfree(attr->parse_attr->tun_info[out_index]);
+ }
+}
+
+static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
@@ -1391,15 +1731,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
- bool vf_tun = false, encap_valid = true;
- struct net_device *encap_dev = NULL;
struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *out_priv;
- struct mlx5_fc *counter;
+ bool vf_tun, encap_valid;
u32 max_prio, max_chain;
int err = 0;
- int out_index;
parse_attr = attr->parse_attr;
esw_attr = attr->esw_attr;
@@ -1430,7 +1765,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_out;
- if (!attr->chain && esw_attr->int_port) {
+ if (!attr->chain && esw_attr->int_port &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
/* If decap route device is internal port, change the
* source vport value in reg_c0 back to uplink just in
* case the rule performs goto chain > 0. If we have a miss
@@ -1445,7 +1781,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
metadata);
if (err)
- return err;
+ goto err_out;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
}
@@ -1461,13 +1799,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (attr->chain) {
NL_SET_ERR_MSG_MOD(extack,
"Internal port rule is only supported on chain 0");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_out;
}
if (attr->dest_chain) {
NL_SET_ERR_MSG_MOD(extack,
"Internal port rule offload doesn't support goto action");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_out;
}
int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
@@ -1475,56 +1815,25 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
flow_flag_test(flow, EGRESS) ?
MLX5E_TC_INT_PORT_EGRESS :
MLX5E_TC_INT_PORT_INGRESS);
- if (IS_ERR(int_port))
- return PTR_ERR(int_port);
-
- esw_attr->int_port = int_port;
- }
-
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- struct net_device *out_dev;
- int mirred_ifindex;
-
- if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
- continue;
-
- mirred_ifindex = parse_attr->mirred_ifindex[out_index];
- out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
- if (!out_dev) {
- NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
- err = -ENODEV;
+ if (IS_ERR(int_port)) {
+ err = PTR_ERR(int_port);
goto err_out;
}
- err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
- extack, &encap_dev, &encap_valid);
- dev_put(out_dev);
- if (err)
- goto err_out;
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- vf_tun = true;
- out_priv = netdev_priv(encap_dev);
- rpriv = out_priv->ppriv;
- esw_attr->dests[out_index].rep = rpriv->rep;
- esw_attr->dests[out_index].mdev = out_priv->mdev;
+ esw_attr->int_port = int_port;
}
- if (vf_tun && esw_attr->out_count > 1) {
- NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
- err = -EOPNOTSUPP;
+ err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
+ if (err)
goto err_out;
- }
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
goto err_out;
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
- !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
if (vf_tun) {
- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
if (err)
goto err_out;
} else {
@@ -1535,20 +1844,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw_attr->counter_dev, true);
- if (IS_ERR(counter)) {
- err = PTR_ERR(counter);
+ err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
+ if (err)
goto err_out;
- }
-
- attr->counter = counter;
}
/* we get here if one of the following takes place:
* (1) there's no error
* (2) there's an encap action and we don't have valid neigh
*/
- if (!encap_valid)
+ if (!encap_valid || flow_flag_test(flow, SLOW))
flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
@@ -1585,8 +1890,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
- bool vf_tun = false;
- int out_index;
+ bool vf_tun;
esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
@@ -1610,29 +1914,17 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow->decap_route)
mlx5e_detach_decap_route(priv, flow);
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- vf_tun = true;
- if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
- mlx5e_detach_encap(priv, flow, out_index);
- kfree(attr->parse_attr->tun_info[out_index]);
- }
- }
+ clean_encap_dests(priv, flow, attr, &vf_tun);
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts);
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
if (vf_tun && attr->modify_hdr)
mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
else
mlx5e_detach_mod_hdr(priv, flow);
}
- kfree(attr->sample_attr);
- kvfree(attr->parse_attr);
- kvfree(attr->esw_attr->rx_tun_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
@@ -1646,12 +1938,22 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
+ free_flow_post_acts(flow);
+
+ if (flow->attr->lag.count)
+ mlx5_lag_del_mpesw_rule(esw->dev);
+
+ kvfree(attr->esw_attr->rx_tun_attr);
+ kvfree(attr->parse_attr);
kfree(flow->attr);
}
struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
{
- return flow->attr->counter;
+ struct mlx5_flow_attr *attr;
+
+ attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
+ return attr->counter;
}
/* Iterate over tmp_list of flows attached to flow_list head. */
@@ -1863,7 +2165,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
- flow->tunnel_id = value;
+ flow->attr->tunnel_id = value;
return 0;
err_set:
@@ -1877,8 +2179,8 @@ err_enc_opts:
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
{
- u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
- u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
+ u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
+ u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct mlx5_eswitch *esw;
@@ -1894,11 +2196,6 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
enc_opts_id);
}
-u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
-{
- return flow->tunnel_id;
-}
-
void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
struct flow_match_basic *match, bool outer,
void *headers_c, void *headers_v)
@@ -1948,6 +2245,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
return ip_version;
}
+/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
+ * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
+ * +---------+----------------------------------------+
+ * |Arriving | Arriving Outer Header |
+ * | Inner +---------+---------+---------+----------+
+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
+ * +---------+---------+---------+---------+----------+
+ * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
+ * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
+ * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
+ * | CE | CE | CE | CE | CE |
+ * +---------+---------+---------+---------+----------+
+ *
+ * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
+ * the inner ip_ecn value before hardware decap action.
+ *
+ * Cells marked are changed from original inner packet ip_ecn value during decap, and
+ * so matching those values on inner ip_ecn before decap will fail.
+ *
+ * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
+ * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
+ * and such we can drop the inner ip_ecn=CE match.
+ */
+
+static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ bool *match_inner_ecn)
+{
+ u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ip match;
+
+ *match_inner_ecn = true;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ flow_rule_match_enc_ip(rule, &match);
+ outer_ecn_key = match.key->tos & INET_ECN_MASK;
+ outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+ flow_rule_match_ip(rule, &match);
+ inner_ecn_key = match.key->tos & INET_ECN_MASK;
+ inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
+ }
+
+ if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!outer_ecn_mask) {
+ if (!inner_ecn_mask)
+ return 0;
+
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev,
+ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev,
+ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!inner_ecn_mask)
+ return 0;
+
+ /* Both inner and outer have full mask on ecn */
+
+ if (outer_ecn_key == INET_ECN_ECT_1) {
+ /* inner ecn might change by DECAP action */
+
+ NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
+ netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (outer_ecn_key != INET_ECN_CE)
+ return 0;
+
+ if (inner_ecn_key != INET_ECN_CE) {
+ /* Can't happen in software, as packet ecn will be changed to CE after decap */
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
+ netdev_warn(priv->netdev,
+ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
+ * drop match on inner ecn
+ */
+ *match_inner_ecn = false;
+
+ return 0;
+}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -2053,16 +2455,14 @@ static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
outer_headers);
}
-static void *get_match_headers_value(u32 flags,
- struct mlx5_flow_spec *spec)
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
get_match_inner_headers_value(spec) :
get_match_outer_headers_value(spec);
}
-static void *get_match_headers_criteria(u32 flags,
- struct mlx5_flow_spec *spec)
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
get_match_inner_headers_criteria(spec) :
@@ -2143,6 +2543,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector *dissector = rule->match.dissector;
enum fs_flow_table_type fs_type;
+ bool match_inner_ecn = true;
u16 addr_type = 0;
u8 ip_proto = 0;
u8 *match_level;
@@ -2196,6 +2597,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
headers_c = get_match_inner_headers_criteria(spec);
headers_v = get_match_inner_headers_value(spec);
}
+
+ err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
+ if (err)
+ return err;
}
err = mlx5e_flower_parse_meta(filter_dev, f);
@@ -2257,6 +2662,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
match.key->vlan_priority);
*match_level = MLX5_MATCH_L2;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
+ match.mask->vlan_eth_type &&
+ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
+ ft_field_support.outer_second_vid,
+ fs_type)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ spec->match_criteria_enable |=
+ MLX5_MATCH_MISC_PARAMETERS;
+ }
}
} else if (*match_level != MLX5_MATCH_NONE) {
/* cvlan_tag enabled in match criteria and
@@ -2419,10 +2835,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct flow_match_ip match;
flow_rule_match_ip(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
- match.mask->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
- match.key->tos & 0x3);
+ if (match_inner_ecn) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
+ }
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
match.mask->tos >> 2);
@@ -2607,55 +3025,6 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
return err;
}
-struct pedit_headers {
- struct ethhdr eth;
- struct vlan_hdr vlan;
- struct iphdr ip4;
- struct ipv6hdr ip6;
- struct tcphdr tcp;
- struct udphdr udp;
-};
-
-struct pedit_headers_action {
- struct pedit_headers vals;
- struct pedit_headers masks;
- u32 pedits;
-};
-
-static int pedit_header_offsets[] = {
- [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
- [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
- [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
- [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
- [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
-};
-
-#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
-
-static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
- struct pedit_headers_action *hdrs,
- struct netlink_ext_ack *extack)
-{
- u32 *curr_pmask, *curr_pval;
-
- curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
- curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
-
- if (*curr_pmask & mask) { /* disallow acting twice on the same location */
- NL_SET_ERR_MSG_MOD(extack,
- "curr_pmask and new mask same. Acting twice on same location");
- goto out_err;
- }
-
- *curr_pmask |= mask;
- *curr_pval |= (val & mask);
-
- return 0;
-
-out_err:
- return -EOPNOTSUPP;
-}
-
struct mlx5_fields {
u8 field;
u8 field_bsize;
@@ -2759,34 +3128,32 @@ static unsigned long mask_to_le(unsigned long mask, int size)
return mask;
}
+
static int offload_pedit_fields(struct mlx5e_priv *priv,
int namespace,
- struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow_parse_attr *parse_attr,
u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
- int i, action_size, first, last, next_z;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
void *headers_c, *headers_v, *action, *vals_p;
u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
struct mlx5e_tc_mod_hdr_acts *mod_acts;
- struct mlx5_fields *f;
unsigned long mask, field_mask;
- int err;
+ int i, first, last, next_z;
+ struct mlx5_fields *f;
u8 cmd;
mod_acts = &parse_attr->mod_hdr_acts;
- headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
- headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
+ headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
set_masks = &hdrs[0].masks;
add_masks = &hdrs[1].masks;
set_vals = &hdrs[0].vals;
add_vals = &hdrs[1].vals;
- action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
-
for (i = 0; i < ARRAY_SIZE(fields); i++) {
bool skip;
@@ -2854,18 +3221,16 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
- if (err) {
+ action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
+ if (IS_ERR(action)) {
NL_SET_ERR_MSG_MOD(extack,
"too many pedit actions, can't offload");
mlx5_core_warn(priv->mdev,
"mlx5: parsed %d pedit actions, can't do more\n",
mod_acts->num_actions);
- return err;
+ return PTR_ERR(action);
}
- action = mod_acts->actions +
- (mod_acts->num_actions * action_size);
MLX5_SET(set_action_in, action, action_type, cmd);
MLX5_SET(set_action_in, action, field, f->field);
@@ -2895,205 +3260,51 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
return 0;
}
-static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
- int namespace)
-{
- if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
- return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
- else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
- return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
-}
-
-int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
- int namespace,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
-{
- int action_size, new_num_actions, max_hw_actions;
- size_t new_sz, old_sz;
- void *ret;
-
- if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
- return 0;
-
- action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
-
- max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
- namespace);
- new_num_actions = min(max_hw_actions,
- mod_hdr_acts->actions ?
- mod_hdr_acts->max_actions * 2 : 1);
- if (mod_hdr_acts->max_actions == new_num_actions)
- return -ENOSPC;
-
- new_sz = action_size * new_num_actions;
- old_sz = mod_hdr_acts->max_actions * action_size;
- ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
- if (!ret)
- return -ENOMEM;
-
- memset(ret + old_sz, 0, new_sz - old_sz);
- mod_hdr_acts->actions = ret;
- mod_hdr_acts->max_actions = new_num_actions;
-
- return 0;
-}
-
-void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
-{
- kfree(mod_hdr_acts->actions);
- mod_hdr_acts->actions = NULL;
- mod_hdr_acts->num_actions = 0;
- mod_hdr_acts->max_actions = 0;
-}
-
static const struct pedit_headers zero_masks = {};
-static int
-parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
- const struct flow_action_entry *act, int namespace,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- struct netlink_ext_ack *extack)
-{
- u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
- int err = -EOPNOTSUPP;
- u32 mask, val, offset;
- u8 htype;
-
- htype = act->mangle.htype;
- err = -EOPNOTSUPP; /* can't be all optimistic */
-
- if (htype == FLOW_ACT_MANGLE_UNSPEC) {
- NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
- goto out_err;
- }
-
- if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
- NL_SET_ERR_MSG_MOD(extack,
- "The pedit offload action is not supported");
- goto out_err;
- }
-
- mask = act->mangle.mask;
- val = act->mangle.val;
- offset = act->mangle.offset;
-
- err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack);
- if (err)
- goto out_err;
-
- hdrs[cmd].pedits++;
-
- return 0;
-out_err:
- return err;
-}
-
-static int
-parse_pedit_to_reformat(const struct flow_action_entry *act,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct netlink_ext_ack *extack)
+static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
{
- u32 mask, val, offset;
- u32 *p;
-
- if (act->id != FLOW_ACTION_MANGLE) {
- NL_SET_ERR_MSG_MOD(extack, "Unsupported action id");
- return -EOPNOTSUPP;
- }
+ struct pedit_headers *cmd_masks;
+ u8 cmd;
- if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
- NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
- return -EOPNOTSUPP;
+ for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
+ cmd_masks = &parse_attr->hdrs[cmd].masks;
+ if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+ NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
+ netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
+ print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
+ 16, 1, cmd_masks, sizeof(zero_masks), true);
+ return -EOPNOTSUPP;
+ }
}
- mask = ~act->mangle.mask;
- val = act->mangle.val;
- offset = act->mangle.offset;
- p = (u32 *)&parse_attr->eth;
- *(p + (offset >> 2)) |= (val & mask);
-
return 0;
}
-static int parse_tc_pedit_action(struct mlx5e_priv *priv,
- const struct flow_action_entry *act, int namespace,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
-{
- if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
- return parse_pedit_to_reformat(act, parse_attr, extack);
-
- return parse_pedit_to_modify_hdr(priv, act, namespace,
- parse_attr, hdrs, extack);
-}
-
static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
u32 *action_flags,
struct netlink_ext_ack *extack)
{
- struct pedit_headers *cmd_masks;
int err;
- u8 cmd;
- err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
- action_flags, extack);
- if (err < 0)
+ err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
+ if (err)
goto out_dealloc_parsed_actions;
- for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
- cmd_masks = &hdrs[cmd].masks;
- if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
- NL_SET_ERR_MSG_MOD(extack,
- "attempt to offload an unsupported field");
- netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
- print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
- 16, 1, cmd_masks, sizeof(zero_masks), true);
- err = -EOPNOTSUPP;
- goto out_dealloc_parsed_actions;
- }
- }
+ err = verify_offload_pedit_fields(priv, parse_attr, extack);
+ if (err)
+ goto out_dealloc_parsed_actions;
return 0;
out_dealloc_parsed_actions:
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
return err;
}
-static bool csum_offload_supported(struct mlx5e_priv *priv,
- u32 action,
- u32 update_flags,
- struct netlink_ext_ack *extack)
-{
- u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
- TCA_CSUM_UPDATE_FLAG_UDP;
-
- /* The HW recalcs checksums only if re-writing headers */
- if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
- NL_SET_ERR_MSG_MOD(extack,
- "TC csum action is only offloaded with pedit");
- netdev_warn(priv->netdev,
- "TC csum action is only offloaded with pedit\n");
- return false;
- }
-
- if (update_flags & ~prot_flags) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload TC csum action for some header/s");
- netdev_warn(priv->netdev,
- "can't offload TC csum action for some header/s - flags %#x\n",
- update_flags);
- return false;
- }
-
- return true;
-}
-
struct ip_ttl_word {
__u8 ttl;
__u8 protocol;
@@ -3216,8 +3427,8 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv,
u8 ip_proto;
int i;
- headers_c = get_match_headers_criteria(actions, spec);
- headers_v = get_match_headers_value(actions, spec);
+ headers_c = mlx5e_get_match_headers_criteria(actions, spec);
+ headers_v = mlx5e_get_match_headers_value(actions, spec);
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */
@@ -3291,11 +3502,11 @@ actions_match_supported_fdb(struct mlx5e_priv *priv,
static bool
actions_match_supported(struct mlx5e_priv *priv,
struct flow_action *flow_action,
+ u32 actions,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
- u32 actions = flow->attr->action;
bool ct_flow, ct_clear;
ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
@@ -3307,6 +3518,30 @@ actions_match_supported(struct mlx5e_priv *priv,
return false;
}
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
+
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
+
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
!modify_header_match_supported(priv, &parse_attr->spec, flow_action,
actions, ct_flow, ct_clear, extack))
@@ -3324,7 +3559,7 @@ static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv
return priv->mdev == peer_priv->mdev;
}
-static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
{
struct mlx5_core_dev *fmdev, *pmdev;
u64 fsystem_guid, psystem_guid;
@@ -3338,139 +3573,14 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
return (fsystem_guid == psystem_guid);
}
-static bool same_vf_reps(struct mlx5e_priv *priv,
- struct net_device *out_dev)
-{
- return mlx5e_eswitch_vf_rep(priv->netdev) &&
- priv->netdev == out_dev;
-}
-
-static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
- const struct flow_action_entry *act,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- u32 *action, struct netlink_ext_ack *extack)
-{
- u16 mask16 = VLAN_VID_MASK;
- u16 val16 = act->vlan.vid & VLAN_VID_MASK;
- const struct flow_action_entry pedit_act = {
- .id = FLOW_ACTION_MANGLE,
- .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
- .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
- .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
- .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
- };
- u8 match_prio_mask, match_prio_val;
- void *headers_c, *headers_v;
- int err;
-
- headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
- headers_v = get_match_headers_value(*action, &parse_attr->spec);
-
- if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
- MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
- NL_SET_ERR_MSG_MOD(extack,
- "VLAN rewrite action must have VLAN protocol match");
- return -EOPNOTSUPP;
- }
-
- match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
- match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
- if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Changing VLAN prio is not supported");
- return -EOPNOTSUPP;
- }
-
- err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
- *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
- return err;
-}
-
-static int
-add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- u32 *action, struct netlink_ext_ack *extack)
-{
- const struct flow_action_entry prio_tag_act = {
- .vlan.vid = 0,
- .vlan.prio =
- MLX5_GET(fte_match_set_lyr_2_4,
- get_match_headers_value(*action,
- &parse_attr->spec),
- first_prio) &
- MLX5_GET(fte_match_set_lyr_2_4,
- get_match_headers_criteria(*action,
- &parse_attr->spec),
- first_prio),
- };
-
- return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
- &prio_tag_act, parse_attr, hdrs, action,
- extack);
-}
-
-static int validate_goto_chain(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- const struct flow_action_entry *act,
- u32 actions,
- struct netlink_ext_ack *extack)
-{
- bool is_esw = mlx5e_is_eswitch_flow(flow);
- struct mlx5_flow_attr *attr = flow->attr;
- bool ft_flow = mlx5e_is_ft_flow(flow);
- u32 dest_chain = act->chain_index;
- struct mlx5_fs_chains *chains;
- struct mlx5_eswitch *esw;
- u32 reformat_and_fwd;
- u32 max_chain;
-
- esw = priv->mdev->priv.eswitch;
- chains = is_esw ? esw_chains(esw) : nic_chains(priv);
- max_chain = mlx5_chains_get_chain_range(chains);
- reformat_and_fwd = is_esw ?
- MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
- MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
-
- if (ft_flow) {
- NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
- return -EOPNOTSUPP;
- }
-
- if (!mlx5_chains_backwards_supported(chains) &&
- dest_chain <= attr->chain) {
- NL_SET_ERR_MSG_MOD(extack,
- "Goto lower numbered chain isn't supported");
- return -EOPNOTSUPP;
- }
-
- if (dest_chain > max_chain) {
- NL_SET_ERR_MSG_MOD(extack,
- "Requested destination chain is out of supported range");
- return -EOPNOTSUPP;
- }
-
- if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
- MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
- !reformat_and_fwd) {
- NL_SET_ERR_MSG_MOD(extack,
- "Goto chain is not allowed if action has reformat or decap");
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
static int
actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr,
- struct pedit_headers_action *hdrs,
struct netlink_ext_ack *extack)
{
struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
enum mlx5_flow_namespace_type ns_type;
int err;
@@ -3478,19 +3588,18 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
!hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
return 0;
- ns_type = get_flow_name_space(flow);
+ ns_type = mlx5e_get_flow_namespace(flow);
- err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs,
- &attr->action, extack);
+ err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
if (err)
return err;
- /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
if (parse_attr->mod_hdr_acts.num_actions > 0)
return 0;
+ /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
return 0;
@@ -3502,295 +3611,372 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
return 0;
}
-static int
-parse_tc_nic_actions(struct mlx5e_priv *priv,
- struct flow_action *flow_action,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+static struct mlx5_flow_attr*
+mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
{
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
- struct pedit_headers_action hdrs[2] = {};
- const struct flow_action_entry *act;
- struct mlx5_nic_flow_attr *nic_attr;
- u32 action = 0;
- int err, i;
+ u32 attr_sz = ns_to_attr_sz(ns_type);
+ struct mlx5_flow_attr *attr2;
- if (!flow_action_has_entries(flow_action)) {
- NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
- return -EINVAL;
+ attr2 = mlx5_alloc_flow_attr(ns_type);
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+ if (!attr2 || !parse_attr) {
+ kvfree(parse_attr);
+ kfree(attr2);
+ return NULL;
}
- if (!flow_action_hw_stats_check(flow_action, extack,
- FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
- NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
- return -EOPNOTSUPP;
+ memcpy(attr2, attr, attr_sz);
+ INIT_LIST_HEAD(&attr2->list);
+ parse_attr->filter_dev = attr->parse_attr->filter_dev;
+ attr2->action = 0;
+ attr2->flags = 0;
+ attr2->parse_attr = parse_attr;
+ attr2->dest_chain = 0;
+ attr2->dest_ft = NULL;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ attr2->esw_attr->out_count = 0;
+ attr2->esw_attr->split_count = 0;
}
- nic_attr = attr->nic_attr;
- nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
- parse_attr = attr->parse_attr;
+ return attr2;
+}
- flow_action_for_each(i, act, flow_action) {
- switch (act->id) {
- case FLOW_ACTION_ACCEPT:
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- case FLOW_ACTION_DROP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
+static struct mlx5_core_dev *
+get_flow_counter_dev(struct mlx5e_tc_flow *flow)
+{
+ return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
+}
+
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
+ int i;
+
+ list_for_each_entry(attr, &flow->attrs, list) {
+ esw_attr = attr->esw_attr;
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
+ return attr;
+ }
+ }
+
+ return NULL;
+}
+
+void
+mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr;
+
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
break;
- case FLOW_ACTION_MANGLE:
- case FLOW_ACTION_ADD:
- err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr, hdrs, NULL, extack);
- if (err)
- return err;
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
+ }
+}
+
+static void
+free_flow_post_acts(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr, *tmp;
+ bool vf_tun;
+
+ list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
break;
- case FLOW_ACTION_VLAN_MANGLE:
- err = add_vlan_rewrite_action(priv,
- MLX5_FLOW_NAMESPACE_KERNEL,
- act, parse_attr, hdrs,
- &action, extack);
- if (err)
- return err;
+ if (attr->post_act_handle)
+ mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
+
+ clean_encap_dests(flow->priv, flow, attr, &vf_tun);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(counter_dev, attr->counter);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ if (attr->modify_hdr)
+ mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
+ }
+
+ list_del(&attr->list);
+ kvfree(attr->parse_attr);
+ kfree(attr);
+ }
+}
+
+int
+mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr;
+ int err = 0;
+
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
break;
- case FLOW_ACTION_CSUM:
- if (csum_offload_supported(priv, action,
- act->csum_flags,
- extack))
- break;
- return -EOPNOTSUPP;
- case FLOW_ACTION_REDIRECT: {
- struct net_device *peer_dev = act->dev;
-
- if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
- same_hw_devs(priv, netdev_priv(peer_dev))) {
- parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
- flow_flag_set(flow, HAIRPIN);
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "device is not on same HW, can't offload");
- netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
- peer_dev->name);
- return -EOPNOTSUPP;
- }
- }
+ err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
+ if (err)
break;
- case FLOW_ACTION_MARK: {
- u32 mark = act->mark;
+ }
- if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
- NL_SET_ERR_MSG_MOD(extack,
- "Bad flow mark - only 16 bit is supported");
- return -EOPNOTSUPP;
- }
+ return err;
+}
- nic_attr->flow_tag = mark;
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
- break;
- case FLOW_ACTION_GOTO:
- err = validate_goto_chain(priv, flow, act, action,
- extack);
+/* TC filter rule HW translation:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * |
+ * | if multi table action
+ * |
+ * v
+ * +---------------------+
+ * + post act ft |<----.
+ * + match fte id | | split on multi table action
+ * + do actions |-----'
+ * +---------------------+
+ * |
+ * |
+ * v
+ * Do rest of the actions after last multi table action.
+ */
+static int
+alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr, *next_attr = NULL;
+ struct mlx5e_post_act_handle *handle;
+ bool vf_tun, encap_valid = true;
+ int err;
+
+ /* This is going in reverse order as needed.
+ * The first entry is the last attribute.
+ */
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (!next_attr) {
+ /* Set counter action on last post act rule. */
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else {
+ err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
if (err)
- return err;
+ goto out_free;
+ }
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- attr->dest_chain = act->chain_index;
+ /* Don't add post_act rule for first attr (last in the list).
+ * It's being handled by the caller.
+ */
+ if (list_is_last(&attr->list, &flow->attrs))
break;
- case FLOW_ACTION_CT:
- err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
- &parse_attr->mod_hdr_acts,
- act, extack);
+
+ err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
+ if (err)
+ goto out_free;
+
+ if (!encap_valid)
+ flow_flag_set(flow, SLOW);
+
+ err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
+ if (err)
+ goto out_free;
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
if (err)
- return err;
+ goto out_free;
+ }
- flow_flag_set(flow, CT);
- break;
- default:
- NL_SET_ERR_MSG_MOD(extack,
- "The offload action is not supported in NIC action");
- return -EOPNOTSUPP;
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
+ if (err)
+ goto out_free;
}
- }
- attr->action = action;
+ handle = mlx5e_tc_post_act_add(post_act, attr);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_free;
+ }
- if (attr->dest_chain && parse_attr->mirred_ifindex[0]) {
- NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
- return -EOPNOTSUPP;
+ attr->post_act_handle = handle;
+ next_attr = attr;
}
- err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
- if (err)
- return err;
+ if (flow_flag_test(flow, SLOW))
+ goto out;
- if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
- return -EOPNOTSUPP;
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err)
+ goto out_free;
+out:
return 0;
+
+out_free:
+ free_flow_post_acts(flow);
+ return err;
}
-static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
- struct net_device *peer_netdev)
+static int
+parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action)
{
- struct mlx5e_priv *peer_priv;
-
- peer_priv = netdev_priv(peer_netdev);
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_action flow_action_reorder;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5_flow_attr *attr = flow->attr;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_priv *priv = flow->priv;
+ struct flow_action_entry *act, **_act;
+ struct mlx5e_tc_act *tc_act;
+ int err, i;
- return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
- mlx5e_eswitch_vf_rep(priv->netdev) &&
- mlx5e_eswitch_vf_rep(peer_netdev) &&
- same_hw_devs(priv, peer_priv));
-}
+ flow_action_reorder.num_entries = flow_action->num_entries;
+ flow_action_reorder.entries = kcalloc(flow_action->num_entries,
+ sizeof(flow_action), GFP_KERNEL);
+ if (!flow_action_reorder.entries)
+ return -ENOMEM;
-static int parse_tc_vlan_action(struct mlx5e_priv *priv,
- const struct flow_action_entry *act,
- struct mlx5_esw_flow_attr *attr,
- u32 *action,
- struct netlink_ext_ack *extack)
-{
- u8 vlan_idx = attr->total_vlan;
+ mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
- if (vlan_idx >= MLX5_FS_VLAN_DEPTH) {
- NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported");
- return -EOPNOTSUPP;
- }
+ ns_type = mlx5e_get_flow_namespace(flow);
+ list_add(&attr->list, &flow->attrs);
- switch (act->id) {
- case FLOW_ACTION_VLAN_POP:
- if (vlan_idx) {
- if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
- MLX5_FS_VLAN_DEPTH)) {
- NL_SET_ERR_MSG_MOD(extack,
- "vlan pop action is not supported");
- return -EOPNOTSUPP;
- }
+ flow_action_for_each(i, _act, &flow_action_reorder) {
+ act = *_act;
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act) {
+ NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
+ err = -EOPNOTSUPP;
+ goto out_free;
+ }
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
- } else {
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ if (!tc_act->can_offload(parse_state, act, i, attr)) {
+ err = -EOPNOTSUPP;
+ goto out_free;
}
- break;
- case FLOW_ACTION_VLAN_PUSH:
- attr->vlan_vid[vlan_idx] = act->vlan.vid;
- attr->vlan_prio[vlan_idx] = act->vlan.prio;
- attr->vlan_proto[vlan_idx] = act->vlan.proto;
- if (!attr->vlan_proto[vlan_idx])
- attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
-
- if (vlan_idx) {
- if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
- MLX5_FS_VLAN_DEPTH)) {
- NL_SET_ERR_MSG_MOD(extack,
- "vlan push action is not supported for vlan depth > 1");
- return -EOPNOTSUPP;
- }
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
- } else {
- if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
- (act->vlan.proto != htons(ETH_P_8021Q) ||
- act->vlan.prio)) {
- NL_SET_ERR_MSG_MOD(extack,
- "vlan push action is not supported");
- return -EOPNOTSUPP;
+ err = tc_act->parse_action(parse_state, act, priv, attr);
+ if (err)
+ goto out_free;
+
+ parse_state->actions |= attr->action;
+
+ /* Split attr for multi table act if not the last act. */
+ if (tc_act->is_multi_table_act &&
+ tc_act->is_multi_table_act(priv, act, attr) &&
+ i < flow_action_reorder.num_entries - 1) {
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ if (err)
+ goto out_free;
+
+ attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto out_free;
}
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ list_add(&attr->list, &flow->attrs);
}
- break;
- default:
- NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN");
- return -EINVAL;
}
- attr->total_vlan = vlan_idx + 1;
+ kfree(flow_action_reorder.entries);
+
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ if (err)
+ goto out_free_post_acts;
+
+ err = alloc_flow_post_acts(flow, extack);
+ if (err)
+ goto out_free_post_acts;
return 0;
+
+out_free:
+ kfree(flow_action_reorder.entries);
+out_free_post_acts:
+ free_flow_post_acts(flow);
+
+ return err;
}
-static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
- struct net_device *out_dev)
+static int
+flow_action_supported(struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
{
- struct net_device *fdb_out_dev = out_dev;
- struct net_device *uplink_upper;
+ if (!flow_action_has_entries(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
+ return -EINVAL;
+ }
- rcu_read_lock();
- uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
- if (uplink_upper && netif_is_lag_master(uplink_upper) &&
- uplink_upper == out_dev) {
- fdb_out_dev = uplink_dev;
- } else if (netif_is_lag_master(out_dev)) {
- fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
- if (fdb_out_dev &&
- (!mlx5e_eswitch_rep(fdb_out_dev) ||
- !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
- fdb_out_dev = NULL;
+ if (!flow_action_hw_stats_check(flow_action, extack,
+ FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
+ return -EOPNOTSUPP;
}
- rcu_read_unlock();
- return fdb_out_dev;
+
+ return 0;
}
-static int add_vlan_push_action(struct mlx5e_priv *priv,
- struct mlx5_flow_attr *attr,
- struct net_device **out_dev,
- u32 *action,
- struct netlink_ext_ack *extack)
+static int
+parse_tc_nic_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
- struct net_device *vlan_dev = *out_dev;
- struct flow_action_entry vlan_act = {
- .id = FLOW_ACTION_VLAN_PUSH,
- .vlan.vid = vlan_dev_vlan_id(vlan_dev),
- .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
- .vlan.prio = 0,
- };
+ struct mlx5e_tc_act_parse_state *parse_state;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
int err;
- err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack);
+ err = flow_action_supported(flow_action, extack);
if (err)
return err;
- rcu_read_lock();
- *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
- rcu_read_unlock();
- if (!*out_dev)
- return -ENODEV;
+ attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ parse_attr = attr->parse_attr;
+ parse_state = &parse_attr->parse_state;
+ mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
+ parse_state->ct_priv = get_ct_priv(priv);
+
+ err = parse_tc_actions(parse_state, flow_action);
+ if (err)
+ return err;
- if (is_vlan_dev(*out_dev))
- err = add_vlan_push_action(priv, attr, out_dev, action, extack);
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
+ if (err)
+ return err;
- return err;
+ if (!actions_match_supported(priv, flow_action, parse_state->actions,
+ parse_attr, flow, extack))
+ return -EOPNOTSUPP;
+
+ return 0;
}
-static int add_vlan_pop_action(struct mlx5e_priv *priv,
- struct mlx5_flow_attr *attr,
- u32 *action,
- struct netlink_ext_ack *extack)
+static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
{
- struct flow_action_entry vlan_act = {
- .id = FLOW_ACTION_VLAN_POP,
- };
- int nest_level, err = 0;
+ struct mlx5e_priv *peer_priv;
- nest_level = attr->parse_attr->filter_dev->lower_level -
- priv->netdev->lower_level;
- while (nest_level--) {
- err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack);
- if (err)
- return err;
- }
+ peer_priv = netdev_priv(peer_netdev);
- return err;
+ return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
+ mlx5e_eswitch_vf_rep(priv->netdev) &&
+ mlx5e_eswitch_vf_rep(peer_netdev) &&
+ mlx5e_same_hw_devs(priv, peer_priv));
}
static bool same_hw_reps(struct mlx5e_priv *priv,
@@ -3802,7 +3988,7 @@ static bool same_hw_reps(struct mlx5e_priv *priv,
return mlx5e_eswitch_rep(priv->netdev) &&
mlx5e_eswitch_rep(peer_netdev) &&
- same_hw_devs(priv, peer_priv);
+ mlx5e_same_hw_devs(priv, peer_priv);
}
static bool is_lag_dev(struct mlx5e_priv *priv,
@@ -3813,12 +3999,25 @@ static bool is_lag_dev(struct mlx5e_priv *priv,
same_hw_reps(priv, peer_netdev));
}
+static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ if (same_hw_reps(priv, out_dev) &&
+ MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ return true;
+
+ return false;
+}
+
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
struct net_device *out_dev)
{
if (is_merged_eswitch_vfs(priv, out_dev))
return true;
+ if (is_multiport_eligible(priv, out_dev))
+ return true;
+
if (is_lag_dev(priv, out_dev))
return true;
@@ -3826,66 +4025,6 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
same_port_devs(priv, netdev_priv(out_dev));
}
-static bool is_duplicated_output_device(struct net_device *dev,
- struct net_device *out_dev,
- int *ifindexes, int if_count,
- struct netlink_ext_ack *extack)
-{
- int i;
-
- for (i = 0; i < if_count; i++) {
- if (ifindexes[i] == out_dev->ifindex) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't duplicate output to same device");
- netdev_err(dev, "can't duplicate output to same device: %s\n",
- out_dev->name);
- return true;
- }
- }
-
- return false;
-}
-
-static int verify_uplink_forwarding(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *out_dev,
- struct netlink_ext_ack *extack)
-{
- struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *rep_priv;
-
- /* Forwarding non encapsulated traffic between
- * uplink ports is allowed only if
- * termination_table_raw_traffic cap is set.
- *
- * Input vport was stored attr->in_rep.
- * In LAG case, *priv* is the private data of
- * uplink which may be not the input vport.
- */
- rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
-
- if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
- mlx5e_eswitch_uplink_rep(out_dev)))
- return 0;
-
- if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
- termination_table_raw_traffic)) {
- NL_SET_ERR_MSG_MOD(extack,
- "devices are both uplink, can't offload forwarding");
- pr_err("devices %s %s are both uplink, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
- return -EOPNOTSUPP;
- } else if (out_dev != rep_priv->netdev) {
- NL_SET_ERR_MSG_MOD(extack,
- "devices are not the same uplink, can't offload forwarding");
- pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr,
int ifindex,
@@ -3925,442 +4064,57 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
return 0;
}
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
- struct flow_action *flow_action,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+static int
+parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
- struct pedit_headers_action hdrs[2] = {};
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_act_parse_state *parse_state;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_sample_attr sample_attr = {};
- const struct ip_tunnel_info *info = NULL;
struct mlx5_flow_attr *attr = flow->attr;
- int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
- bool ft_flow = mlx5e_is_ft_flow(flow);
- const struct flow_action_entry *act;
struct mlx5_esw_flow_attr *esw_attr;
- bool encap = false, decap = false;
- u32 action = attr->action;
- int err, i, if_count = 0;
- bool ptype_host = false;
- bool mpls_push = false;
-
- if (!flow_action_has_entries(flow_action)) {
- NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
- return -EINVAL;
- }
+ struct net_device *filter_dev;
+ int err;
- if (!flow_action_hw_stats_check(flow_action, extack,
- FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
- NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
- return -EOPNOTSUPP;
- }
+ err = flow_action_supported(flow_action, extack);
+ if (err)
+ return err;
esw_attr = attr->esw_attr;
parse_attr = attr->parse_attr;
+ filter_dev = parse_attr->filter_dev;
+ parse_state = &parse_attr->parse_state;
+ mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
+ parse_state->ct_priv = get_ct_priv(priv);
- flow_action_for_each(i, act, flow_action) {
- switch (act->id) {
- case FLOW_ACTION_ACCEPT:
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT;
- break;
- case FLOW_ACTION_PTYPE:
- if (act->ptype != PACKET_HOST) {
- NL_SET_ERR_MSG_MOD(extack,
- "skbedit ptype is only supported with type host");
- return -EOPNOTSUPP;
- }
-
- ptype_host = true;
- break;
- case FLOW_ACTION_DROP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- case FLOW_ACTION_TRAP:
- if (!flow_offload_has_one_action(flow_action)) {
- NL_SET_ERR_MSG_MOD(extack,
- "action trap is supported as a sole action only");
- return -EOPNOTSUPP;
- }
- action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT);
- attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
- break;
- case FLOW_ACTION_MPLS_PUSH:
- if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
- reformat_l2_to_l3_tunnel) ||
- act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
- NL_SET_ERR_MSG_MOD(extack,
- "mpls push is supported only for mpls_uc protocol");
- return -EOPNOTSUPP;
- }
- mpls_push = true;
- break;
- case FLOW_ACTION_MPLS_POP:
- /* we only support mpls pop if it is the first action
- * and the filter net device is bareudp. Subsequent
- * actions can be pedit and the last can be mirred
- * egress redirect.
- */
- if (i) {
- NL_SET_ERR_MSG_MOD(extack,
- "mpls pop supported only as first action");
- return -EOPNOTSUPP;
- }
- if (!netif_is_bareudp(parse_attr->filter_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "mpls pop supported only on bareudp devices");
- return -EOPNOTSUPP;
- }
-
- parse_attr->eth.h_proto = act->mpls_pop.proto;
- action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- flow_flag_set(flow, L3_TO_L2_DECAP);
- break;
- case FLOW_ACTION_MANGLE:
- case FLOW_ACTION_ADD:
- err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr, hdrs, flow, extack);
- if (err)
- return err;
-
- if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- esw_attr->split_count = esw_attr->out_count;
- }
- break;
- case FLOW_ACTION_CSUM:
- if (csum_offload_supported(priv, action,
- act->csum_flags, extack))
- break;
-
- return -EOPNOTSUPP;
- case FLOW_ACTION_REDIRECT_INGRESS: {
- struct net_device *out_dev;
-
- out_dev = act->dev;
- if (!out_dev)
- return -EOPNOTSUPP;
-
- if (!netif_is_ovs_master(out_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "redirect to ingress is supported only for OVS internal ports");
- return -EOPNOTSUPP;
- }
-
- if (netif_is_ovs_master(parse_attr->filter_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "redirect to ingress is not supported from internal port");
- return -EOPNOTSUPP;
- }
-
- if (!ptype_host) {
- NL_SET_ERR_MSG_MOD(extack,
- "redirect to int port ingress requires ptype=host action");
- return -EOPNOTSUPP;
- }
-
- if (esw_attr->out_count) {
- NL_SET_ERR_MSG_MOD(extack,
- "redirect to int port ingress is supported only as single destination");
- return -EOPNOTSUPP;
- }
-
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
-
- err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
- MLX5E_TC_INT_PORT_INGRESS,
- &action, esw_attr->out_count);
- if (err)
- return err;
-
- esw_attr->out_count++;
-
- break;
- }
- case FLOW_ACTION_REDIRECT:
- case FLOW_ACTION_MIRRED: {
- struct mlx5e_priv *out_priv;
- struct net_device *out_dev;
-
- out_dev = act->dev;
- if (!out_dev) {
- /* out_dev is NULL when filters with
- * non-existing mirred device are replayed to
- * the driver.
- */
- return -EINVAL;
- }
-
- if (mpls_push && !netif_is_bareudp(out_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "mpls is supported only through a bareudp device");
- return -EOPNOTSUPP;
- }
-
- if (ft_flow && out_dev == priv->netdev) {
- /* Ignore forward to self rules generated
- * by adding both mlx5 devs to the flow table
- * block on a normal nft offload setup.
- */
- return -EOPNOTSUPP;
- }
-
- if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't support more output ports, can't offload forwarding");
- netdev_warn(priv->netdev,
- "can't support more than %d output ports, can't offload forwarding\n",
- esw_attr->out_count);
- return -EOPNOTSUPP;
- }
-
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- if (encap) {
- parse_attr->mirred_ifindex[esw_attr->out_count] =
- out_dev->ifindex;
- parse_attr->tun_info[esw_attr->out_count] =
- mlx5e_dup_tun_info(info);
- if (!parse_attr->tun_info[esw_attr->out_count])
- return -ENOMEM;
- encap = false;
- esw_attr->dests[esw_attr->out_count].flags |=
- MLX5_ESW_DEST_ENCAP;
- esw_attr->out_count++;
- /* attr->dests[].rep is resolved when we
- * handle encap
- */
- } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
-
- if (is_duplicated_output_device(priv->netdev,
- out_dev,
- ifindexes,
- if_count,
- extack))
- return -EOPNOTSUPP;
-
- ifindexes[if_count] = out_dev->ifindex;
- if_count++;
-
- out_dev = get_fdb_out_dev(uplink_dev, out_dev);
- if (!out_dev)
- return -ENODEV;
-
- if (is_vlan_dev(out_dev)) {
- err = add_vlan_push_action(priv, attr,
- &out_dev,
- &action, extack);
- if (err)
- return err;
- }
-
- if (is_vlan_dev(parse_attr->filter_dev)) {
- err = add_vlan_pop_action(priv, attr,
- &action, extack);
- if (err)
- return err;
- }
-
- if (netif_is_macvlan(out_dev))
- out_dev = macvlan_dev_real_dev(out_dev);
-
- err = verify_uplink_forwarding(priv, flow, out_dev, extack);
- if (err)
- return err;
-
- if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "devices are not on same switch HW, can't offload forwarding");
- return -EOPNOTSUPP;
- }
-
- if (same_vf_reps(priv, out_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't forward from a VF to itself");
- return -EOPNOTSUPP;
- }
-
- out_priv = netdev_priv(out_dev);
- rpriv = out_priv->ppriv;
- esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
- esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
- esw_attr->out_count++;
- } else if (netif_is_ovs_master(out_dev)) {
- err = mlx5e_set_fwd_to_int_port_actions(priv, attr,
- out_dev->ifindex,
- MLX5E_TC_INT_PORT_EGRESS,
- &action,
- esw_attr->out_count);
- if (err)
- return err;
-
- esw_attr->out_count++;
- } else if (parse_attr->filter_dev != priv->netdev) {
- /* All mlx5 devices are called to configure
- * high level device filters. Therefore, the
- * *attempt* to install a filter on invalid
- * eswitch should not trigger an explicit error
- */
- return -EINVAL;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "devices are not on same switch HW, can't offload forwarding");
- netdev_warn(priv->netdev,
- "devices %s %s not on same switch HW, can't offload forwarding\n",
- priv->netdev->name,
- out_dev->name);
- return -EOPNOTSUPP;
- }
- }
- break;
- case FLOW_ACTION_TUNNEL_ENCAP:
- info = act->tunnel;
- if (info) {
- encap = true;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "Zero tunnel attributes is not supported");
- return -EOPNOTSUPP;
- }
-
- break;
- case FLOW_ACTION_VLAN_PUSH:
- case FLOW_ACTION_VLAN_POP:
- if (act->id == FLOW_ACTION_VLAN_PUSH &&
- (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
- /* Replace vlan pop+push with vlan modify */
- action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
- err = add_vlan_rewrite_action(priv,
- MLX5_FLOW_NAMESPACE_FDB,
- act, parse_attr, hdrs,
- &action, extack);
- } else {
- err = parse_tc_vlan_action(priv, act, esw_attr, &action, extack);
- }
- if (err)
- return err;
-
- esw_attr->split_count = esw_attr->out_count;
- break;
- case FLOW_ACTION_VLAN_MANGLE:
- err = add_vlan_rewrite_action(priv,
- MLX5_FLOW_NAMESPACE_FDB,
- act, parse_attr, hdrs,
- &action, extack);
- if (err)
- return err;
-
- esw_attr->split_count = esw_attr->out_count;
- break;
- case FLOW_ACTION_TUNNEL_DECAP:
- decap = true;
- break;
- case FLOW_ACTION_GOTO:
- err = validate_goto_chain(priv, flow, act, action,
- extack);
- if (err)
- return err;
-
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- attr->dest_chain = act->chain_index;
- break;
- case FLOW_ACTION_CT:
- if (flow_flag_test(flow, SAMPLE)) {
- NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
- return -EOPNOTSUPP;
- }
- err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
- &parse_attr->mod_hdr_acts,
- act, extack);
- if (err)
- return err;
-
- flow_flag_set(flow, CT);
- esw_attr->split_count = esw_attr->out_count;
- break;
- case FLOW_ACTION_SAMPLE:
- if (flow_flag_test(flow, CT)) {
- NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
- return -EOPNOTSUPP;
- }
- sample_attr.rate = act->sample.rate;
- sample_attr.group_num = act->sample.psample_group->group_num;
- if (act->sample.truncate)
- sample_attr.trunc_size = act->sample.trunc_size;
- flow_flag_set(flow, SAMPLE);
- break;
- default:
- NL_SET_ERR_MSG_MOD(extack,
- "The offload action is not supported in FDB action");
- return -EOPNOTSUPP;
- }
- }
+ err = parse_tc_actions(parse_state, flow_action);
+ if (err)
+ return err;
/* Forward to/from internal port can only have 1 dest */
- if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) &&
+ if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
esw_attr->out_count > 1) {
NL_SET_ERR_MSG_MOD(extack,
"Rules with internal port can have only one destination");
return -EOPNOTSUPP;
}
- /* always set IP version for indirect table handling */
- attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
-
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
- action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
- /* For prio tag mode, replace vlan pop with rewrite vlan prio
- * tag rewrite.
- */
- action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
- err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
- &action, extack);
- if (err)
- return err;
+ /* Forward from tunnel/internal port to internal port is not supported */
+ if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
+ esw_attr->dest_int_port) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Forwarding from tunnel/internal port to internal port is not supported");
+ return -EOPNOTSUPP;
}
- attr->action = action;
-
- err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
if (err)
return err;
- if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
- return -EOPNOTSUPP;
-
- if (attr->dest_chain && decap) {
- /* It can be supported if we'll create a mapping for
- * the tunnel device only (without tunnel), and set
- * this tunnel id with this decap flow.
- *
- * On restore (miss), we'll just set this saved tunnel
- * device.
- */
-
- NL_SET_ERR_MSG(extack, "Decap with goto isn't supported");
- netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ if (!actions_match_supported(priv, flow_action, parse_state->actions,
+ parse_attr, flow, extack))
return -EOPNOTSUPP;
- }
-
- /* Allocate sample attribute only when there is a sample action and
- * no errors after parsing.
- */
- if (flow_flag_test(flow, SAMPLE)) {
- attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
- if (!attr->sample_attr)
- return -ENOMEM;
- *attr->sample_attr = sample_attr;
- }
return 0;
}
@@ -4394,14 +4148,14 @@ static const struct rhashtable_params tc_ht_params = {
static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
unsigned long flags)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5e_rep_priv *rpriv;
if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- return &uplink_rpriv->uplink_priv.tc_ht;
+ rpriv = priv->ppriv;
+ return &rpriv->tc_ht;
} else /* NIC offload */
- return &priv->fs.tc.ht;
+ return &tc->ht;
}
static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
@@ -4434,7 +4188,12 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
sizeof(struct mlx5_nic_flow_attr);
struct mlx5_flow_attr *attr;
- return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
+ if (!attr)
+ return attr;
+
+ INIT_LIST_HEAD(&attr->list);
+ return attr;
}
static int
@@ -4458,7 +4217,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
flow->cookie = f->cookie;
flow->priv = priv;
- attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
+ attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
if (!attr)
goto err_free;
@@ -4468,6 +4227,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
INIT_LIST_HEAD(&flow->encaps[out_index].list);
INIT_LIST_HEAD(&flow->hairpin);
INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
+ INIT_LIST_HEAD(&flow->attrs);
refcount_set(&flow->refcnt, 1);
init_completion(&flow->init_done);
init_completion(&flow->del_hw_done);
@@ -4525,6 +4285,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_core_dev *in_mdev)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -4553,21 +4314,33 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
+ /* always set IP version for indirect table handling */
+ flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
+
err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
+ if (flow->attr->lag.count) {
+ err = mlx5_lag_add_mpesw_rule(esw->dev);
+ if (err)
+ goto err_free;
+ }
+
err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
complete_all(&flow->init_done);
if (err) {
if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
- goto err_free;
+ goto err_lag;
add_unready_flow(flow);
}
return flow;
+err_lag:
+ if (flow->attr->lag.count)
+ mlx5_lag_del_mpesw_rule(esw->dev);
err_free:
mlx5e_flow_put(priv, flow);
out:
@@ -4714,7 +4487,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
err_free:
flow_flag_set(flow, FAILED);
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
return err;
@@ -4766,7 +4539,7 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
int err = 0;
if (!mlx5_esw_hold(priv->mdev))
- return -EAGAIN;
+ return -EBUSY;
mlx5_esw_get(priv->mdev);
@@ -4965,6 +4738,33 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
return err;
}
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when exceed action is not drop");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+ !flow_action_is_last_entry(action, act)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is ok, but action is not last");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.peakrate_bytes_ps ||
+ act->police.avrate || act->police.overhead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when peakrate/avrate/overhead is configured");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct netlink_ext_ack *extack)
@@ -4992,10 +4792,16 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_POLICE:
- if (act->police.rate_pkt_ps) {
- NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+ if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not continue");
return -EOPNOTSUPP;
}
+
+ err = mlx5e_policer_validate(flow_action, act, extack);
+ if (err)
+ return err;
+
err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
if (err)
return err;
@@ -5014,14 +4820,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
struct tc_cls_matchall_offload *ma)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = ma->common.extack;
- if (!mlx5_esw_qos_enabled(esw)) {
- NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
- return -EOPNOTSUPP;
- }
-
if (ma->common.prio != 1) {
NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
return -EINVAL;
@@ -5057,22 +4857,23 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
struct mlx5e_priv *peer_priv)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
struct mlx5e_hairpin_entry *hpe, *tmp;
LIST_HEAD(init_wait_list);
u16 peer_vhca_id;
int bkt;
- if (!same_hw_devs(priv, peer_priv))
+ if (!mlx5e_same_hw_devs(priv, peer_priv))
return;
peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
+ mutex_lock(&tc->hairpin_tbl_lock);
+ hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
if (refcount_inc_not_zero(&hpe->refcnt))
list_add(&hpe->dead_peer_wait_list, &init_wait_list);
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
wait_for_completion(&hpe->res_ready);
@@ -5087,7 +4888,6 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- struct mlx5e_flow_steering *fs;
struct mlx5e_priv *peer_priv;
struct mlx5e_tc_table *tc;
struct mlx5e_priv *priv;
@@ -5098,8 +4898,7 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
- fs = container_of(tc, struct mlx5e_flow_steering, tc);
- priv = container_of(fs, struct mlx5e_priv, fs);
+ priv = tc->priv;
peer_priv = netdev_priv(ndev);
if (priv == peer_priv ||
!(priv->netdev->features & NETIF_F_HW_TC))
@@ -5126,9 +4925,39 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
return tc_tbl_size;
}
+static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_flow_table **ft = &tc->miss_t;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ ft_attr.max_fte = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_TC_MISS_LEVEL;
+ ft_attr.prio = 0;
+ ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
+
+ *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(*ft)) {
+ err = PTR_ERR(*ft);
+ netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
+ }
+
+ return err;
+}
+
+static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+
+ mlx5_destroy_flow_table(tc->miss_t);
+}
+
int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
{
- struct mlx5e_tc_table *tc = &priv->fs.tc;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_core_dev *dev = priv->mdev;
struct mapping_ctx *chains_mapping;
struct mlx5_chains_attr attr = {};
@@ -5139,6 +4968,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
mutex_init(&tc->t_lock);
mutex_init(&tc->hairpin_tbl_lock);
hash_init(tc->hairpin_tbl);
+ tc->priv = priv;
err = rhashtable_init(&tc->ht, &tc_ht_params);
if (err)
@@ -5158,23 +4988,27 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
}
tc->mapping = chains_mapping;
+ err = mlx5e_tc_nic_create_miss_table(priv);
+ if (err)
+ goto err_chains;
+
if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
- attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
+ attr.default_ft = tc->miss_t;
attr.mapping = chains_mapping;
tc->chains = mlx5_chains_create(dev, &attr);
if (IS_ERR(tc->chains)) {
err = PTR_ERR(tc->chains);
- goto err_chains;
+ goto err_miss;
}
tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
- tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
+ tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
@@ -5193,6 +5027,8 @@ err_reg:
mlx5_tc_ct_clean(tc->ct);
mlx5e_tc_post_act_destroy(tc->post_act);
mlx5_chains_destroy(tc->chains);
+err_miss:
+ mlx5e_tc_nic_destroy_miss_table(priv);
err_chains:
mapping_destroy(chains_mapping);
err_mapping:
@@ -5211,7 +5047,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
{
- struct mlx5e_tc_table *tc = &priv->fs.tc;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier_dev_net(priv->netdev,
@@ -5233,12 +5069,30 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
mlx5e_tc_post_act_destroy(tc->post_act);
mapping_destroy(tc->mapping);
mlx5_chains_destroy(tc->chains);
+ mlx5e_tc_nic_destroy_miss_table(priv);
+}
+
+int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
+{
+ int err;
+
+ err = rhashtable_init(tc_ht, &tc_ht_params);
+ if (err)
+ return err;
+
+ lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+
+ return 0;
}
-int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
+{
+ rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+}
+
+int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
{
const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
- struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *rpriv;
struct mapping_ctx *mapping;
struct mlx5_eswitch *esw;
@@ -5246,7 +5100,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
u64 mapping_id;
int err = 0;
- uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch;
@@ -5286,12 +5139,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
}
uplink_priv->tunnel_enc_opts_mapping = mapping;
- err = rhashtable_init(tc_ht, &tc_ht_params);
- if (err)
- goto err_ht_init;
-
- lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
-
uplink_priv->encap = mlx5e_tc_tun_init(priv);
if (IS_ERR(uplink_priv->encap)) {
err = PTR_ERR(uplink_priv->encap);
@@ -5301,8 +5148,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
return 0;
err_register_fib_notifier:
- rhashtable_destroy(tc_ht);
-err_ht_init:
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
err_enc_opts_mapping:
mapping_destroy(uplink_priv->tunnel_mapping);
@@ -5316,13 +5161,8 @@ err_tun_mapping:
return err;
}
-void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
{
- struct mlx5_rep_uplink_priv *uplink_priv;
-
- uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
-
- rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
mlx5e_tc_tun_cleanup(uplink_priv->encap);
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
@@ -5331,6 +5171,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
mlx5e_tc_post_act_destroy(uplink_priv->post_act);
}
@@ -5411,13 +5252,13 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 chain = 0, chain_tag, reg_b, zone_restore_id;
struct mlx5e_priv *priv = netdev_priv(skb->dev);
- struct mlx5e_tc_table *tc = &priv->fs.tc;
struct mlx5_mapped_obj mapped_obj;
struct tc_skb_ext *tc_skb_ext;
+ struct mlx5e_tc_table *tc;
int err;
reg_b = be32_to_cpu(cqe->ft_metadata);
-
+ tc = mlx5e_fs_get_tc(priv->fs);
chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
@@ -5436,7 +5277,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
tc_skb_ext->chain = chain;
- zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
+ zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
ESW_ZONE_ID_MASK;
if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index fdb222793027..48241317a535 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -39,6 +39,7 @@
#include "en/tc_ct.h"
#include "en/tc_tun.h"
#include "en/tc/int_port.h"
+#include "en/tc/meter.h"
#include "en_rep.h"
#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
@@ -53,7 +54,7 @@
ESW_FLOW_ATTR_SZ :\
NIC_FLOW_ATTR_SZ)
-
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
struct mlx5e_tc_update_priv {
@@ -71,7 +72,8 @@ struct mlx5_flow_attr {
struct mlx5_fc *counter;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_ct_attr ct_attr;
- struct mlx5e_sample_attr *sample_attr;
+ struct mlx5e_sample_attr sample_attr;
+ struct mlx5e_meter_attr meter_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
u32 chain;
u16 prio;
@@ -82,13 +84,41 @@ struct mlx5_flow_attr {
u8 outer_match_level;
u8 ip_version;
u8 tun_ip_version;
+ int tunnel_id; /* mapped tunnel id */
u32 flags;
+ u32 exe_aso_type;
+ struct list_head list;
+ struct mlx5e_post_act_handle *post_act_handle;
+ struct {
+ /* Indicate whether the parsed flow should be counted for lag mode decision
+ * making
+ */
+ bool count;
+ } lag;
+ /* keep this union last */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
};
};
+enum {
+ MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0),
+ MLX5_ATTR_FLAG_SLOW_PATH = BIT(1),
+ MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2),
+ MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3),
+ MLX5_ATTR_FLAG_SAMPLE = BIT(4),
+ MLX5_ATTR_FLAG_ACCEPT = BIT(5),
+ MLX5_ATTR_FLAG_CT = BIT(6),
+};
+
+/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
+static inline bool
+mlx5e_tc_attr_flags_skip(u32 attr_flags)
+{
+ return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT);
+}
+
struct mlx5_rx_tun_attr {
u16 decap_vport;
union {
@@ -149,9 +179,11 @@ enum {
#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
-int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
-void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
-bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
+int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv);
+void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv);
+
+int mlx5e_tc_ht_init(struct rhashtable *tc_ht);
+void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht);
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags);
@@ -201,6 +233,7 @@ enum mlx5e_tc_attr_to_reg {
FTEID_TO_REG,
NIC_CHAIN_TO_REG,
NIC_ZONE_RESTORE_TO_REG,
+ PACKET_COLOR_TO_REG,
};
struct mlx5e_tc_attr_to_reg_mapping {
@@ -213,6 +246,10 @@ struct mlx5e_tc_attr_to_reg_mapping {
extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
+#define MLX5_REG_MAPPING_MOFFSET(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].moffset)
+#define MLX5_REG_MAPPING_MBITS(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].mlen)
+#define MLX5_REG_MAPPING_MASK(reg_id) (GENMASK(mlx5e_tc_attr_to_reg_mappings[reg_id].mlen - 1, 0))
+
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
struct net_device *out_dev);
@@ -244,16 +281,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
u32 data);
int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow);
-
-int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
- int namespace,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
-void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
-
-struct mlx5e_tc_flow;
-u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
struct flow_match_basic *match, bool outer,
@@ -295,6 +324,8 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
#else /* CONFIG_MLX5_CLS_ACT */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; }
+static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {}
static inline int
mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
{ return -EOPNOTSUPP; }
@@ -326,6 +357,8 @@ mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
#endif
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+struct mlx5e_tc_table *mlx5e_tc_table_alloc(void);
+void mlx5e_tc_table_free(struct mlx5e_tc_table *tc);
static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
{
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
@@ -346,6 +379,8 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
#else /* CONFIG_MLX5_CLS_ACT */
+static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; }
+static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {}
static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
{ return false; }
static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 7fd33b356cc8..f7897ddb29c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -39,7 +39,9 @@
#include "ipoib/ipoib.h"
#include "en_accel/en_accel.h"
#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/macsec.h"
#include "en/ptp.h"
+#include <net/ipv6.h>
static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
{
@@ -53,117 +55,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
}
}
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
-{
- int dscp_cp = 0;
-
- if (skb->protocol == htons(ETH_P_IP))
- dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- else if (skb->protocol == htons(ETH_P_IPV6))
- dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
-
- return priv->dcbx_dp.dscp2prio[dscp_cp];
-}
-#endif
-
-static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- int up = 0;
-
- if (!netdev_get_num_tc(dev))
- goto return_txq;
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
- up = mlx5e_get_dscp_up(priv, skb);
- else
-#endif
- if (skb_vlan_tag_present(skb))
- up = skb_vlan_tag_get_prio(skb);
-
-return_txq:
- return priv->port_ptp_tc2realtxq[up];
-}
-
-static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
- u16 htb_maj_id)
-{
- u16 classid;
-
- if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
- classid = TC_H_MIN(skb->priority);
- else
- classid = READ_ONCE(priv->htb.defcls);
-
- if (!classid)
- return 0;
-
- return mlx5e_get_txq_by_classid(priv, classid);
-}
-
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- int num_tc_x_num_ch;
- int txq_ix;
- int up = 0;
- int ch_ix;
-
- /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
- num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
- if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
- struct mlx5e_ptp *ptp_channel;
-
- /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
- u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
-
- if (unlikely(htb_maj_id)) {
- txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
- if (txq_ix > 0)
- return txq_ix;
- }
-
- ptp_channel = READ_ONCE(priv->channels.ptp);
- if (unlikely(ptp_channel &&
- test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
- mlx5e_use_ptpsq(skb)))
- return mlx5e_select_ptpsq(dev, skb);
-
- txq_ix = netdev_pick_tx(dev, skb, NULL);
- /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
- * If they are selected, switch to regular queues.
- * Driver to select these queues only at mlx5e_select_ptpsq()
- * and mlx5e_select_htb_queue().
- */
- if (unlikely(txq_ix >= num_tc_x_num_ch))
- txq_ix %= num_tc_x_num_ch;
- } else {
- txq_ix = netdev_pick_tx(dev, skb, NULL);
- }
-
- if (!netdev_get_num_tc(dev))
- return txq_ix;
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
- up = mlx5e_get_dscp_up(priv, skb);
- else
-#endif
- if (skb_vlan_tag_present(skb))
- up = skb_vlan_tag_get_prio(skb);
-
- /* Normalize any picked txq_ix to [0, num_channels),
- * So we can return a txq_ix that matches the channel and
- * packet UP.
- */
- ch_ix = priv->txq2sq[txq_ix]->ch_ix;
-
- return priv->channel_tc2realtxq[ch_ix][up];
-}
-
static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
{
#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
@@ -202,16 +93,26 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
return min_t(u16, hlen, skb_headlen(skb));
}
+#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \
+ "This copy has been bounds-checked earlier in " \
+ "mlx5i_sq_calc_wqe_attr() and intentionally " \
+ "crosses a flex array boundary. Since it is " \
+ "performance sensitive, splitting the copy is " \
+ "undesirable."
+
static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
{
struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
int cpy1_sz = 2 * ETH_ALEN;
int cpy2_sz = ihs - cpy1_sz;
- memcpy(vhdr, skb->data, cpy1_sz);
+ memcpy(&vhdr->addrs, skb->data, cpy1_sz);
vhdr->h_vlan_proto = skb->vlan_proto;
vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
- memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
+ unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto,
+ skb->data + cpy1_sz,
+ cpy2_sz,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
}
static inline void
@@ -241,23 +142,32 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
sq->stats->csum_none++;
}
+/* Returns the number of header bytes that we plan
+ * to inline later in the transmit descriptor
+ */
static inline u16
-mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
{
struct mlx5e_sq_stats *stats = sq->stats;
u16 ihs;
+ *hopbyhop = 0;
if (skb->encapsulation) {
- ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
+ ihs = skb_inner_tcp_all_headers(skb);
stats->tso_inner_packets++;
stats->tso_inner_bytes += skb->len - ihs;
} else {
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
- else
- ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ } else {
+ ihs = skb_tcp_all_headers(skb);
+ if (ipv6_has_hopopt_jumbo(skb)) {
+ *hopbyhop = sizeof(struct hop_jumbo_hdr);
+ ihs -= sizeof(struct hop_jumbo_hdr);
+ }
+ }
stats->tso_packets++;
- stats->tso_bytes += skb->len - ihs;
+ stats->tso_bytes += skb->len - ihs - *hopbyhop;
}
return ihs;
@@ -319,6 +229,7 @@ struct mlx5e_tx_attr {
__be16 mss;
u16 insz;
u8 opcode;
+ u8 hopbyhop;
};
struct mlx5e_tx_wqe_attr {
@@ -355,14 +266,16 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_sq_stats *stats = sq->stats;
if (skb_is_gso(skb)) {
- u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+ int hopbyhop;
+ u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop);
*attr = (struct mlx5e_tx_attr) {
.opcode = MLX5_OPCODE_LSO,
.mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
.ihs = ihs,
.num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
- .headlen = skb_headlen(skb) - ihs,
+ .headlen = skb_headlen(skb) - ihs - hopbyhop,
+ .hopbyhop = hopbyhop,
};
stats->packets += skb_shinfo(skb)->gso_segs;
@@ -392,6 +305,8 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
u16 ds_cnt_inl = 0;
u16 ds_cnt_ids = 0;
+ /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */
+
if (attr->insz)
ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
MLX5_SEND_WQE_DS);
@@ -404,6 +319,9 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
inl += VLAN_HLEN;
ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS))
+ netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl,
+ (u16)MLX5E_MAX_TX_INLINE_DS);
ds_cnt += ds_cnt_inl;
}
@@ -429,6 +347,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
}
}
+static void mlx5e_tx_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ /* Must not be called when a MPWQE session is active but empty. */
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+
+ wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
const struct mlx5e_tx_attr *attr,
@@ -459,6 +397,11 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
if (unlikely(sq->ptpsq)) {
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb);
+ if (!netif_tx_queue_stopped(sq->txq) &&
+ !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
skb_get(skb);
}
@@ -476,7 +419,8 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg;
struct mlx5_wqe_data_seg *dseg;
struct mlx5e_tx_wqe_info *wi;
-
+ u16 ihs = attr->ihs;
+ struct ipv6hdr *h6;
struct mlx5e_sq_stats *stats = sq->stats;
int num_dma;
@@ -490,15 +434,40 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
eseg->mss = attr->mss;
- if (attr->ihs) {
- if (skb_vlan_tag_present(skb)) {
- eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
- mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
+ if (ihs) {
+ u8 *start = eseg->inline_hdr.start;
+
+ if (unlikely(attr->hopbyhop)) {
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ if (skb_vlan_tag_present(skb)) {
+ mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6));
+ ihs += VLAN_HLEN;
+ h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr));
+ } else {
+ unsafe_memcpy(start, skb->data,
+ ETH_HLEN + sizeof(*h6),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ h6 = (struct ipv6hdr *)(start + ETH_HLEN);
+ }
+ h6->nexthdr = IPPROTO_TCP;
+ /* Copy the TCP header after the IPv6 one */
+ memcpy(h6 + 1,
+ skb->data + ETH_HLEN + sizeof(*h6) +
+ sizeof(struct hop_jumbo_hdr),
+ tcp_hdrlen(skb));
+ /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
+ } else if (skb_vlan_tag_present(skb)) {
+ mlx5e_insert_vlan(start, skb, ihs);
+ ihs += VLAN_HLEN;
stats->added_vlan_packets++;
} else {
- eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
- memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ attr->ihs,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
}
+ eseg->inline_hdr.sz |= cpu_to_be16(ihs);
dseg += wqe_attr->ds_cnt_inl;
} else if (skb_vlan_tag_present(skb)) {
eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
@@ -509,7 +478,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
dseg += wqe_attr->ds_cnt_ids;
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop,
attr->headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
@@ -521,12 +490,13 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
{
return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
- !attr->insz;
+ !attr->insz && !mlx5e_macsec_skb_is_offload(skb);
}
static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
@@ -544,7 +514,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
@@ -622,6 +592,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_xmit_data txd;
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+
if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
mlx5e_tx_mpwqe_session_start(sq, eseg);
} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
@@ -631,21 +608,12 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
sq->stats->xmit_more += xmit_more;
- txd.data = skb->data;
- txd.len = skb->len;
-
- txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
- goto err_unmap;
mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
-
mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
-
mlx5e_tx_mpwqe_add_dseg(sq, &txd);
-
mlx5e_tx_skb_update_hwts_flags(skb);
- if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
cseg = mlx5e_tx_mpwqe_session_complete(sq);
@@ -664,6 +632,7 @@ err_unmap:
mlx5e_dma_unmap_wqe_err(sq, 1);
sq->stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
@@ -673,12 +642,22 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
mlx5e_tx_mpwqe_session_complete(sq);
}
+static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc &
+ ptpsq->ts_cqe_ctr_mask);
+}
+
static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
struct mlx5_wqe_eth_seg *eseg, u16 ihs)
{
mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
+ if (unlikely(sq->ptpsq))
+ mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg);
}
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -691,8 +670,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
struct mlx5e_txqsq *sq;
u16 pi;
+ /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
+ * queue being changed is disabled, and smp_wmb guarantees that the
+ * changes are visible before mlx5e_xmit tries to read from txq2sq. It
+ * guarantees that the value of txq2sq[qid] doesn't change while
+ * mlx5e_xmit is running on queue number qid. smb_wmb is paired with
+ * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
+ */
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
if (unlikely(!sq)) {
+ /* Two cases when sq can be NULL:
+ * 1. The HTB node is registered, and mlx5e_select_queue
+ * selected its queue ID, but the SQ itself is not yet created.
+ * 2. HTB SQ creation failed. Similar to the previous case, but
+ * the SQ won't be created.
+ */
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
@@ -886,6 +878,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
if (netif_tx_queue_stopped(sq->txq) &&
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+ mlx5e_ptpsq_fifo_has_room(sq) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
stats->wake++;
@@ -1016,12 +1009,34 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
eseg->mss = attr.mss;
if (attr.ihs) {
- memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);
+ if (unlikely(attr.hopbyhop)) {
+ struct ipv6hdr *h6;
+
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ ETH_HLEN + sizeof(*h6),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN);
+ h6->nexthdr = IPPROTO_TCP;
+ /* Copy the TCP header after the IPv6 one */
+ unsafe_memcpy(h6 + 1,
+ skb->data + ETH_HLEN + sizeof(*h6) +
+ sizeof(struct hop_jumbo_hdr),
+ tcp_hdrlen(skb),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
+ } else {
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ attr.ihs,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ }
eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
dseg += wqe_attr.ds_cnt_inl;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop,
attr.headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
@@ -1033,5 +1048,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 833be29170a1..9a458a5d9853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -31,6 +31,7 @@
*/
#include <linux/irq.h>
+#include <net/xdp_sock_drv.h>
#include "en.h"
#include "en/txrx.h"
#include "en/xdp.h"
@@ -86,26 +87,36 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
{
+ bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool);
bool busy_xsk = false, xsk_rx_alloc_err;
- /* Handle the race between the application querying need_wakeup and the
- * driver setting it:
- * 1. Update need_wakeup both before and after the TX. If it goes to
- * "yes", it can only happen with the first update.
- * 2. If the application queried need_wakeup before we set it, the
- * packets will be transmitted anyway, even w/o a wakeup.
- * 3. Give a chance to clear need_wakeup after new packets were queued
- * for TX.
+ /* If SQ is empty, there are no TX completions to trigger NAPI, so set
+ * need_wakeup. Do it before queuing packets for TX to avoid race
+ * condition with userspace.
*/
- mlx5e_xsk_update_tx_wakeup(xsksq);
+ if (need_wakeup && xsksq->pc == xsksq->cc)
+ xsk_set_tx_need_wakeup(xsksq->xsk_pool);
busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
- mlx5e_xsk_update_tx_wakeup(xsksq);
+ /* If we queued some packets for TX, no need for wakeup anymore. */
+ if (need_wakeup && xsksq->pc != xsksq->cc)
+ xsk_clear_tx_need_wakeup(xsksq->xsk_pool);
+ /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it
+ * before refilling to avoid race condition with userspace.
+ */
+ if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq))
+ xsk_set_rx_need_wakeup(xskrq->xsk_pool);
xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes,
mlx5e_post_rx_mpwqes,
mlx5e_post_rx_wqes,
xskrq);
- busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err);
+ /* Ask for wakeup if WQ is not full after refill. */
+ if (!need_wakeup)
+ busy_xsk |= xsk_rx_alloc_err;
+ else if (xsk_rx_alloc_err)
+ xsk_set_rx_need_wakeup(xskrq->xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(xskrq->xsk_pool);
return busy_xsk;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 792e0d6aa861..a0242dc15741 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -5,7 +5,6 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/eq.h>
@@ -19,6 +18,7 @@
#include "lib/clock.h"
#include "diag/fw_tracer.h"
#include "mlx5_irq.h"
+#include "devlink.h"
enum {
MLX5_EQE_OWNER_INIT_VAL = 0x1,
@@ -58,6 +58,8 @@ struct mlx5_eq_table {
struct mutex lock; /* sync async eqs creations */
int num_comp_eqs;
struct mlx5_irq_table *irq_table;
+ struct mlx5_irq **comp_irqs;
+ struct mlx5_irq *ctrl_irq;
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
#endif
@@ -265,8 +267,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
struct mlx5_priv *priv = &dev->priv;
- u16 vecidx = param->irq_index;
__be64 *pas;
+ u16 vecidx;
void *eqc;
int inlen;
u32 *in;
@@ -288,20 +290,16 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
init_eq_buf(eq);
- eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
- if (IS_ERR(eq->irq)) {
- err = PTR_ERR(eq->irq);
- goto err_buf;
- }
-
+ eq->irq = param->irq;
vecidx = mlx5_irq_get_index(eq->irq);
+
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
- goto err_irq;
+ goto err_buf;
}
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
@@ -345,8 +343,6 @@ err_eq:
err_in:
kvfree(in);
-err_irq:
- mlx5_irq_release(eq->irq);
err_buf:
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -400,7 +396,6 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
- mlx5_irq_release(eq->irq);
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -443,7 +438,8 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
struct mlx5_eq_table *eq_table;
int i;
- eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+ eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL,
+ dev->priv.numa_node);
if (!eq_table)
return -ENOMEM;
@@ -579,6 +575,9 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
if (MLX5_CAP_GEN_MAX(dev, vhca_state))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE);
+ if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE);
+
mask[0] = async_event_mask;
if (MLX5_CAP_GEN(dev, event_cap))
@@ -593,11 +592,8 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
eq->irq_nb.notifier_call = mlx5_eq_async_int;
spin_lock_init(&eq->lock);
- if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
- return -ENOMEM;
err = create_async_eq(dev, &eq->core, param);
- free_cpumask_var(param->affinity);
if (err) {
mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
return err;
@@ -622,17 +618,38 @@ static void cleanup_async_eq(struct mlx5_core_dev *dev,
name, err);
}
+static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_NUM_ASYNC_EQE;
+}
static int create_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_param param = {};
int err;
+ /* All the async_eqs are using single IRQ, request one IRQ and share its
+ * index among all the async_eqs of this device.
+ */
+ table->ctrl_irq = mlx5_ctrl_irq_request(dev);
+ if (IS_ERR(table->ctrl_irq))
+ return PTR_ERR(table->ctrl_irq);
+
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = MLX5_NUM_CMD_EQE,
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
@@ -645,8 +662,8 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
- .nent = MLX5_NUM_ASYNC_EQE,
+ .irq = table->ctrl_irq,
+ .nent = async_eq_depth_devlink_param_get(dev),
};
gather_async_events_mask(dev, param.mask);
@@ -655,7 +672,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
goto err2;
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = /* TODO: sriov max_vf + */ 1,
.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
};
@@ -674,6 +691,7 @@ err2:
err1:
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
return err;
}
@@ -688,6 +706,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
}
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
@@ -712,15 +731,14 @@ struct mlx5_eq *
mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq_param *param)
{
- struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+ struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL,
+ dev->priv.numa_node);
int err;
- if (!cpumask_available(param->affinity))
- return ERR_PTR(-EINVAL);
-
if (!eq)
return ERR_PTR(-ENOMEM);
+ param->irq = dev->priv.eq_table->ctrl_irq;
err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
@@ -780,6 +798,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ if (mlx5_core_is_sf(dev))
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+ else
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
+ kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+ u16 *cpus;
+ int ret;
+ int i;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+ if (mlx5_core_is_sf(dev)) {
+ ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+ }
+
+ cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
+ if (!cpus) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+ for (i = 0; i < ncomp_eqs; i++)
+ cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ kfree(cpus);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+
+free_irqs:
+ kfree(table->comp_irqs);
+ return ret;
+}
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -794,6 +860,22 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
tasklet_disable(&eq->tasklet_ctx.task);
kfree(eq);
}
+ comp_irqs_release(dev);
+}
+
+static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_COMP_EQ_SIZE;
}
static int create_comp_eqs(struct mlx5_core_dev *dev)
@@ -805,14 +887,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err;
int i;
+ ncomp_eqs = comp_irqs_request(dev);
+ if (ncomp_eqs < 0)
+ return ncomp_eqs;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_eqs = table->num_comp_eqs;
- nent = MLX5_COMP_EQ_SIZE;
+ nent = comp_eq_depth_devlink_param_get(dev);
+
for (i = 0; i < ncomp_eqs; i++) {
struct mlx5_eq_param param = {};
- int vecidx = i;
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node);
if (!eq) {
err = -ENOMEM;
goto clean;
@@ -825,18 +909,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .irq_index = vecidx,
+ .irq = table->comp_irqs[i],
.nent = nent,
};
- if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
- err = -ENOMEM;
- goto clean_eq;
- }
- cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
- param.affinity);
err = create_map_eq(dev, &eq->core, &param);
- free_cpumask_var(param.affinity);
if (err)
goto clean_eq;
err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
@@ -850,7 +927,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
list_add_tail(&eq->list, &table->comp_eqs_list);
}
+ table->num_comp_eqs = ncomp_eqs;
return 0;
+
clean_eq:
kfree(eq);
clean:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 39e948bc1204..a994e71e05c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -92,6 +92,7 @@ static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw,
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp;
vport->ingress.offloads.modify_metadata_rule =
mlx5_add_flow_rules(vport->ingress.acl,
NULL, &flow_act, NULL, 0);
@@ -117,6 +118,36 @@ static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw,
vport->ingress.offloads.modify_metadata_rule = NULL;
}
+static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ int err = 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_act.fg = vport->ingress.offloads.drop_grp;
+ flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ }
+
+ vport->ingress.offloads.drop_rule = flow_rule;
+out:
+ return err;
+}
+
+static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->ingress.offloads.drop_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.offloads.drop_rule);
+ vport->ingress.offloads.drop_rule = NULL;
+}
+
static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
@@ -154,6 +185,7 @@ static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw,
{
esw_acl_ingress_allow_rule_destroy(vport);
esw_acl_ingress_mod_metadata_destroy(esw, vport);
+ esw_acl_ingress_src_port_drop_destroy(esw, vport);
}
static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -170,10 +202,29 @@ static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw,
if (!flow_group_in)
return -ENOMEM;
+ if (vport->vport == MLX5_VPORT_UPLINK) {
+ /* This group can hold an FTE to drop all traffic.
+ * Need in case LAG is enabled.
+ */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, ret);
+ goto drop_err;
+ }
+ vport->ingress.offloads.drop_grp = g;
+ flow_index++;
+ }
+
if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
/* This group is to hold FTE to match untagged packets when prio_tag
* is enabled.
*/
+ memset(flow_group_in, 0, inlen);
match_criteria = MLX5_ADDR_OF(create_flow_group_in,
flow_group_in, match_criteria);
MLX5_SET(create_flow_group_in, flow_group_in,
@@ -221,6 +272,11 @@ metadata_err:
vport->ingress.offloads.metadata_prio_tag_grp = NULL;
}
prio_tag_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp);
+ vport->ingress.offloads.drop_grp = NULL;
+ }
+drop_err:
kvfree(flow_group_in);
return ret;
}
@@ -236,6 +292,11 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
vport->ingress.offloads.metadata_prio_tag_grp = NULL;
}
+
+ if (vport->ingress.offloads.drop_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp);
+ vport->ingress.offloads.drop_grp = NULL;
+ }
}
int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
@@ -252,6 +313,8 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vport_match_metadata_enabled(esw))
num_ftes++;
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ num_ftes++;
if (esw_acl_ingress_prio_tag_enabled(esw, vport))
num_ftes++;
@@ -320,3 +383,27 @@ out:
vport->metadata = vport->default_metadata;
return err;
}
+
+int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (IS_ERR(vport)) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return PTR_ERR(vport);
+ }
+
+ return esw_acl_ingress_src_port_drop_create(esw, vport);
+}
+
+void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return;
+ }
+
+ esw_acl_ingress_src_port_drop_destroy(esw, vport);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
index c57869b93d60..11d3d3978848 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -6,6 +6,7 @@
#include "eswitch.h"
+#ifdef CONFIG_MLX5_ESWITCH
/* Eswitch acl egress external APIs */
int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
@@ -25,5 +26,19 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vpor
void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
u32 metadata);
+void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num);
+#else /* CONFIG_MLX5_ESWITCH */
+static void
+mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{}
+
+static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return 0;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index f690f430f40f..4fbff7bcc155 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2021 Mellanox Technologies. */
+#include <linux/build_bug.h>
#include <linux/list.h>
#include <linux/notifier.h>
#include <net/netevent.h>
@@ -12,26 +13,57 @@
#define CREATE_TRACE_POINTS
#include "diag/bridge_tracepoint.h"
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000
#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 4 - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_FROM \
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
(MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 2 - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE - 1)
-
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE 64000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000);
+
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1)
#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE / 2 - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
(MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 2)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
(MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000);
#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
@@ -63,12 +95,14 @@ struct mlx5_esw_bridge {
struct mlx5_flow_table *egress_ft;
struct mlx5_flow_group *egress_vlan_fg;
+ struct mlx5_flow_group *egress_qinq_fg;
struct mlx5_flow_group *egress_mac_fg;
struct mlx5_flow_group *egress_miss_fg;
struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
struct mlx5_flow_handle *egress_miss_handle;
unsigned long ageing_time;
u32 flags;
+ u16 vlan_proto;
};
static void
@@ -138,7 +172,9 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
}
static struct mlx5_flow_group *
-mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
+mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
@@ -154,30 +190,53 @@ mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flo
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
- MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
- MLX5_SET(create_flow_group_in, in, start_flow_index,
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM);
- MLX5_SET(create_flow_group_in, in, end_flow_index,
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
fg = mlx5_create_flow_group(ingress_ft, in);
kvfree(in);
if (IS_ERR(fg))
esw_warn(esw->dev,
- "Failed to create VLAN flow group for bridge ingress table (err=%ld)\n",
- PTR_ERR(fg));
+ "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n",
+ vlan_proto, PTR_ERR(fg));
return fg;
}
static struct mlx5_flow_group *
-mlx5_esw_bridge_ingress_filter_fg_create(struct mlx5_eswitch *esw,
- struct mlx5_flow_table *ingress_ft)
+mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned int to,
+ u16 vlan_proto, struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
@@ -193,27 +252,48 @@ mlx5_esw_bridge_ingress_filter_fg_create(struct mlx5_eswitch *esw,
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
- MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
-
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
- MLX5_SET(create_flow_group_in, in, start_flow_index,
- MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_FROM);
- MLX5_SET(create_flow_group_in, in, end_flow_index,
- MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
fg = mlx5_create_flow_group(ingress_ft, in);
if (IS_ERR(fg))
esw_warn(esw->dev,
"Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n",
PTR_ERR(fg));
-
kvfree(in);
return fg;
}
static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021Q, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_qinq_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021AD, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -250,7 +330,9 @@ mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow
}
static struct mlx5_flow_group *
-mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *egress_ft)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
@@ -265,13 +347,14 @@ mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
- MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
- MLX5_SET(create_flow_group_in, in, start_flow_index,
- MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM);
- MLX5_SET(create_flow_group_in, in, end_flow_index,
- MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
fg = mlx5_create_flow_group(egress_ft, in);
if (IS_ERR(fg))
@@ -283,6 +366,25 @@ mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow
}
static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, egress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *egress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, egress_ft);
+}
+
+static struct mlx5_flow_group *
mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -346,7 +448,7 @@ mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow
static int
mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_flow_group *mac_fg, *filter_fg, *vlan_fg;
+ struct mlx5_flow_group *mac_fg, *qinq_filter_fg, *qinq_fg, *vlan_filter_fg, *vlan_fg;
struct mlx5_flow_table *ingress_ft, *skip_ft;
struct mlx5_eswitch *esw = br_offloads->esw;
int err;
@@ -374,10 +476,22 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
goto err_vlan_fg;
}
- filter_fg = mlx5_esw_bridge_ingress_filter_fg_create(esw, ingress_ft);
- if (IS_ERR(filter_fg)) {
- err = PTR_ERR(filter_fg);
- goto err_filter_fg;
+ vlan_filter_fg = mlx5_esw_bridge_ingress_vlan_filter_fg_create(esw, ingress_ft);
+ if (IS_ERR(vlan_filter_fg)) {
+ err = PTR_ERR(vlan_filter_fg);
+ goto err_vlan_filter_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_ingress_qinq_fg_create(esw, ingress_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ qinq_filter_fg = mlx5_esw_bridge_ingress_qinq_filter_fg_create(esw, ingress_ft);
+ if (IS_ERR(qinq_filter_fg)) {
+ err = PTR_ERR(qinq_filter_fg);
+ goto err_qinq_filter_fg;
}
mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft);
@@ -389,13 +503,19 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
br_offloads->ingress_ft = ingress_ft;
br_offloads->skip_ft = skip_ft;
br_offloads->ingress_vlan_fg = vlan_fg;
- br_offloads->ingress_filter_fg = filter_fg;
+ br_offloads->ingress_vlan_filter_fg = vlan_filter_fg;
+ br_offloads->ingress_qinq_fg = qinq_fg;
+ br_offloads->ingress_qinq_filter_fg = qinq_filter_fg;
br_offloads->ingress_mac_fg = mac_fg;
return 0;
err_mac_fg:
- mlx5_destroy_flow_group(filter_fg);
-err_filter_fg:
+ mlx5_destroy_flow_group(qinq_filter_fg);
+err_qinq_filter_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_filter_fg);
+err_vlan_filter_fg:
mlx5_destroy_flow_group(vlan_fg);
err_vlan_fg:
mlx5_destroy_flow_table(skip_ft);
@@ -409,8 +529,12 @@ mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloa
{
mlx5_destroy_flow_group(br_offloads->ingress_mac_fg);
br_offloads->ingress_mac_fg = NULL;
- mlx5_destroy_flow_group(br_offloads->ingress_filter_fg);
- br_offloads->ingress_filter_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_qinq_filter_fg);
+ br_offloads->ingress_qinq_filter_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_qinq_fg);
+ br_offloads->ingress_qinq_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_vlan_filter_fg);
+ br_offloads->ingress_vlan_filter_fg = NULL;
mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg);
br_offloads->ingress_vlan_fg = NULL;
mlx5_destroy_flow_table(br_offloads->skip_ft);
@@ -428,7 +552,7 @@ static int
mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
struct mlx5_esw_bridge *bridge)
{
- struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg;
+ struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg, *qinq_fg;
struct mlx5_pkt_reformat *miss_pkt_reformat = NULL;
struct mlx5_flow_handle *miss_handle = NULL;
struct mlx5_eswitch *esw = br_offloads->esw;
@@ -447,6 +571,12 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
goto err_vlan_fg;
}
+ qinq_fg = mlx5_esw_bridge_egress_qinq_fg_create(esw, egress_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft);
if (IS_ERR(mac_fg)) {
err = PTR_ERR(mac_fg);
@@ -491,6 +621,7 @@ skip_miss_flow:
bridge->egress_ft = egress_ft;
bridge->egress_vlan_fg = vlan_fg;
+ bridge->egress_qinq_fg = qinq_fg;
bridge->egress_mac_fg = mac_fg;
bridge->egress_miss_fg = miss_fg;
bridge->egress_miss_pkt_reformat = miss_pkt_reformat;
@@ -498,6 +629,8 @@ skip_miss_flow:
return 0;
err_mac_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
mlx5_destroy_flow_group(vlan_fg);
err_vlan_fg:
mlx5_destroy_flow_table(egress_ft);
@@ -515,6 +648,7 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
if (bridge->egress_miss_fg)
mlx5_destroy_flow_group(bridge->egress_miss_fg);
mlx5_destroy_flow_group(bridge->egress_mac_fg);
+ mlx5_destroy_flow_group(bridge->egress_qinq_fg);
mlx5_destroy_flow_group(bridge->egress_vlan_fg);
mlx5_destroy_flow_table(bridge->egress_ft);
}
@@ -559,10 +693,17 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char
flow_act.pkt_reformat = vlan->pkt_reformat_push;
flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark;
} else if (vlan) {
- MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
- outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
- outer_headers.cvlan_tag);
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
outer_headers.first_vid);
MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
@@ -645,10 +786,17 @@ mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *a
MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
- MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
- outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
- outer_headers.cvlan_tag);
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1);
@@ -696,10 +844,17 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const u
flow_act.pkt_reformat = vlan->pkt_reformat_pop;
}
- MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
- outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
- outer_headers.cvlan_tag);
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
outer_headers.first_vid);
MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
@@ -774,6 +929,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
bridge->ifindex = ifindex;
bridge->refcnt = 1;
bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
+ bridge->vlan_proto = ETH_P_8021Q;
list_add(&bridge->list, &br_offloads->bridges);
return bridge;
@@ -911,12 +1067,13 @@ mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port)
}
static int
-mlx5_esw_bridge_vlan_push_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_eswitch *esw)
{
struct {
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
- } vlan_hdr = { htons(ETH_P_8021Q), htons(vlan->vid) };
+ } vlan_hdr = { htons(vlan_proto), htons(vlan->vid) };
struct mlx5_pkt_reformat_params reformat_params = {};
struct mlx5_pkt_reformat *pkt_reformat;
@@ -1008,36 +1165,58 @@ mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct
vlan->pkt_mod_hdr_push_mark = NULL;
}
-static struct mlx5_esw_bridge_vlan *
-mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port,
- struct mlx5_eswitch *esw)
+static int
+mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_eswitch *esw)
{
- struct mlx5_esw_bridge_vlan *vlan;
int err;
- vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL);
- if (!vlan)
- return ERR_PTR(-ENOMEM);
-
- vlan->vid = vid;
- vlan->flags = flags;
- INIT_LIST_HEAD(&vlan->fdb_list);
-
if (flags & BRIDGE_VLAN_INFO_PVID) {
- err = mlx5_esw_bridge_vlan_push_create(vlan, esw);
+ err = mlx5_esw_bridge_vlan_push_create(vlan_proto, vlan, esw);
if (err)
- goto err_vlan_push;
+ return err;
err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw);
if (err)
goto err_vlan_push_mark;
}
+
if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
if (err)
goto err_vlan_pop;
}
+ return 0;
+
+err_vlan_pop:
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+err_vlan_push_mark:
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+ return err;
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_bridge_port *port,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ int err;
+
+ vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return ERR_PTR(-ENOMEM);
+
+ vlan->vid = vid;
+ vlan->flags = flags;
+ INIT_LIST_HEAD(&vlan->fdb_list);
+
+ err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw);
+ if (err)
+ goto err_vlan_push_pop;
+
err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL);
if (err)
goto err_xa_insert;
@@ -1048,13 +1227,11 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por
err_xa_insert:
if (vlan->pkt_reformat_pop)
mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
-err_vlan_pop:
if (vlan->pkt_mod_hdr_push_mark)
mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
-err_vlan_push_mark:
if (vlan->pkt_reformat_push)
mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
-err_vlan_push:
+err_vlan_push_pop:
kvfree(vlan);
return ERR_PTR(err);
}
@@ -1102,6 +1279,50 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port,
mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge);
}
+static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&port->vlans, i, vlan) {
+ mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan,
+ br_offloads->esw);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n",
+ vlan->vid, bridge->vlan_proto, port->vport_num,
+ err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_esw_bridge_vlans_recreate(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ err = mlx5_esw_bridge_port_vlans_recreate(port, bridge);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static struct mlx5_esw_bridge_vlan *
mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id,
struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw)
@@ -1287,6 +1508,32 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo
return 0;
}
+int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id,
+ br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ bridge = port->bridge;
+ if (bridge->vlan_proto == proto)
+ return 0;
+ if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) {
+ esw_warn(br_offloads->esw->dev, "Can't set unsupported VLAN protocol %x", proto);
+ return -EOPNOTSUPP;
+ }
+
+ mlx5_esw_bridge_fdb_flush(bridge);
+ bridge->vlan_proto = proto;
+ mlx5_esw_bridge_vlans_recreate(bridge);
+
+ return 0;
+}
+
static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
struct mlx5_esw_bridge_offloads *br_offloads,
struct mlx5_esw_bridge *bridge)
@@ -1434,7 +1681,8 @@ int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
}
- vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, br_offloads->esw);
+ vlan = mlx5_esw_bridge_vlan_create(port->bridge->vlan_proto, vid, flags, port,
+ br_offloads->esw);
if (IS_ERR(vlan)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
return PTR_ERR(vlan);
@@ -1574,6 +1822,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
{
struct mlx5_esw_bridge_offloads *br_offloads;
+ ASSERT_RTNL();
+
br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
if (!br_offloads)
return ERR_PTR(-ENOMEM);
@@ -1590,6 +1840,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
{
struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+ ASSERT_RTNL();
+
if (!br_offloads)
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index efc39975226e..10851a515bca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -26,7 +26,9 @@ struct mlx5_esw_bridge_offloads {
struct mlx5_flow_table *ingress_ft;
struct mlx5_flow_group *ingress_vlan_fg;
- struct mlx5_flow_group *ingress_filter_fg;
+ struct mlx5_flow_group *ingress_vlan_filter_fg;
+ struct mlx5_flow_group *ingress_qinq_fg;
+ struct mlx5_flow_group *ingress_qinq_filter_fg;
struct mlx5_flow_group *ingress_mac_fg;
struct mlx5_flow_table *skip_ft;
@@ -60,6 +62,8 @@ int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsign
struct mlx5_esw_bridge_offloads *br_offloads);
int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
+ struct mlx5_esw_bridge_offloads *br_offloads);
int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
struct mlx5_esw_bridge_offloads *br_offloads,
struct netlink_ext_ack *extack);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
new file mode 100644
index 000000000000..2db13c71e88c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include "eswitch.h"
+
+enum vnic_diag_counter {
+ MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
+ MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
+ MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
+ MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
+ MLX5_VNIC_DIAG_CQ_OVERRUN,
+ MLX5_VNIC_DIAG_INVALID_COMMAND,
+ MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+};
+
+static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+ u32 *val)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ struct mlx5_core_dev *dev = vport->dev;
+ u16 vport_num = vport->vport;
+ void *vnic_diag_out;
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+ if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
+ MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
+ switch (counter) {
+ case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
+ break;
+ case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
+ send_queue_priority_update_flow);
+ break;
+ case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_CQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_INVALID_COMMAND:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
+ break;
+ case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+ break;
+ }
+
+ return 0;
+}
+
+static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+ enum vnic_diag_counter type)
+{
+ u32 val = 0;
+ int ret;
+
+ ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+ if (ret)
+ return ret;
+
+ seq_printf(file, "%d\n", val);
+ return 0;
+}
+
+static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
+}
+
+static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private,
+ MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
+}
+
+static int comp_eq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
+}
+
+static int async_eq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
+}
+
+static int cq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
+}
+
+static int invalid_command_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
+}
+
+static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+}
+
+DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+DEFINE_SHOW_ATTRIBUTE(invalid_command);
+DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ debugfs_remove_recursive(vport->dbgfs);
+ vport->dbgfs = NULL;
+}
+
+/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
+#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ struct dentry *vnic_diag;
+ char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
+ int err;
+
+ if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return;
+
+ if (vport_num == MLX5_VPORT_PF) {
+ strcpy(dir_name, "pf");
+ } else if (vport_num == MLX5_VPORT_ECPF) {
+ strcpy(dir_name, "ecpf");
+ } else {
+ err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
+ is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
+ if (WARN_ON(err < 0))
+ return;
+ }
+
+ vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
+ vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
+
+ if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
+ debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
+ &total_q_under_processor_handle_fops);
+ debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
+ &send_queue_priority_update_flow_fops);
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
+ debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
+ &comp_eq_overrun_fops);
+ debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
+ &async_eq_overrun_fops);
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
+ debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
+ debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
+ &invalid_command_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+ debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+ &quota_exceeded_command_fops);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 7f9b96d9537e..9bc7be95db54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -87,11 +87,11 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
devlink = priv_to_devlink(dev);
dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
- err = devlink_port_register(devlink, dl_port, dl_port_index);
+ err = devl_port_register(devlink, dl_port, dl_port_index);
if (err)
goto reg_err;
- err = devlink_rate_leaf_create(dl_port, vport);
+ err = devl_rate_leaf_create(dl_port, vport);
if (err)
goto rate_err;
@@ -99,7 +99,7 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
return 0;
rate_err:
- devlink_port_unregister(dl_port);
+ devl_port_unregister(dl_port);
reg_err:
mlx5_esw_dl_port_free(dl_port);
return err;
@@ -118,10 +118,10 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
if (vport->dl_port->devlink_rate) {
mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
- devlink_rate_leaf_destroy(vport->dl_port);
+ devl_rate_leaf_destroy(vport->dl_port);
}
- devlink_port_unregister(vport->dl_port);
+ devl_port_unregister(vport->dl_port);
mlx5_esw_dl_port_free(vport->dl_port);
vport->dl_port = NULL;
}
@@ -156,11 +156,11 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
devlink = priv_to_devlink(dev);
dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
- err = devlink_port_register(devlink, dl_port, dl_port_index);
+ err = devl_port_register(devlink, dl_port, dl_port_index);
if (err)
return err;
- err = devlink_rate_leaf_create(dl_port, vport);
+ err = devl_rate_leaf_create(dl_port, vport);
if (err)
goto rate_err;
@@ -168,7 +168,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
return 0;
rate_err:
- devlink_port_unregister(dl_port);
+ devl_port_unregister(dl_port);
return err;
}
@@ -182,9 +182,9 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
if (vport->dl_port->devlink_rate) {
mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
- devlink_rate_leaf_destroy(vport->dl_port);
+ devl_rate_leaf_destroy(vport->dl_port);
}
- devlink_port_unregister(vport->dl_port);
+ devl_port_unregister(vport->dl_port);
vport->dl_port = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
index 3401188e0a60..51ac24e6ec3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template,
__field(unsigned int, used)
),
TP_fast_assign(
- strncpy(__entry->dev_name,
+ strscpy(__entry->dev_name,
netdev_name(fdb->dev),
IFNAMSIZ);
memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index 425c91814b34..c9a91158e99c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -14,6 +14,7 @@
#include "fs_core.h"
#include "esw/indir_table.h"
#include "lib/fs_chains.h"
+#include "en/mod_hdr.h"
#define MLX5_ESW_INDIR_TABLE_SIZE 128
#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
@@ -77,15 +78,19 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
struct mlx5_core_dev *dest_mdev)
{
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ bool vf_sf_vport;
+
+ vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
/* Use indirect table for all IP traffic from UL to VF with vport
* destination when source rewrite flag is set.
*/
return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
- mlx5_eswitch_is_vf_vport(esw, vport_num) &&
+ vf_sf_vport &&
esw->dev == dest_mdev &&
attr->ip_version &&
- attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+ attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
}
u16
@@ -226,7 +231,7 @@ static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
goto err_handle;
}
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
rule->handle = handle;
rule->vni = esw_attr->rx_tun_attr->vni;
rule->mh = flow_act.modify_hdr;
@@ -243,7 +248,7 @@ err_table:
mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
err_mod_hdr_alloc:
err_mod_hdr_regc1:
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
err_mod_hdr_regc0:
err_ethertype:
kfree(rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index df277a6cddc0..fabe49a35a5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -11,6 +11,7 @@
#include "mlx5_core.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "fs_ft_pool.h"
#include "esw/qos.h"
enum {
@@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
if (!flow_group_in)
return -ENOMEM;
- table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
- ft_attr.max_fte = table_size;
+ ft_attr.max_fte = POOL_NEXT_SIZE;
ft_attr.prio = LEGACY_FDB_PRIO;
fdb = mlx5_create_flow_table(root_ns, &ft_attr);
if (IS_ERR(fdb)) {
@@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
goto out;
}
esw->fdb_table.legacy.fdb = fdb;
+ table_size = fdb->max_fte;
/* Addresses group : Full match unicast/multicast addresses */
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -431,7 +432,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
int err = 0;
if (!mlx5_esw_allowed(esw))
- return -EPERM;
+ return vlan ? -EPERM : 0;
if (vlan || qos)
set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
@@ -522,9 +523,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
return PTR_ERR(evport);
mutex_lock(&esw->state_lock);
- err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL);
- if (!err)
- err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL);
+ err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate);
mutex_unlock(&esw->state_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index d377ddc70fc7..4f8a24d84a86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -204,10 +204,8 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid
return 0;
}
-int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
- struct mlx5_vport *evport,
- u32 min_rate,
- struct netlink_ext_ack *extack)
+static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 min_rate, struct netlink_ext_ack *extack)
{
u32 fw_max_bw_share, previous_min_rate;
bool min_rate_supported;
@@ -231,10 +229,8 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
return err;
}
-int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
- struct mlx5_vport *evport,
- u32 max_rate,
- struct netlink_ext_ack *extack)
+static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 max_rate, struct netlink_ext_ack *extack)
{
u32 act_max_rate = max_rate;
bool max_rate_supported;
@@ -432,16 +428,13 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
}
static struct mlx5_esw_rate_group *
-esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
{
u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_esw_rate_group *group;
u32 divider;
int err;
- if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
- return ERR_PTR(-EOPNOTSUPP);
-
group = kzalloc(sizeof(*group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
@@ -482,9 +475,32 @@ err_sched_elem:
return ERR_PTR(err);
}
-static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
- struct mlx5_esw_rate_group *group,
- struct netlink_ext_ack *extack)
+static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
+static void esw_qos_put(struct mlx5_eswitch *esw);
+
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ int err;
+
+ if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = esw_qos_get(esw, extack);
+ if (err)
+ return ERR_PTR(err);
+
+ group = __esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group))
+ esw_qos_put(esw);
+
+ return group;
+}
+
+static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
{
u32 divider;
int err;
@@ -503,7 +519,21 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
+
kfree(group);
+
+ return err;
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = __esw_qos_destroy_rate_group(esw, group, extack);
+ esw_qos_put(esw);
+
return err;
}
@@ -526,7 +556,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
return false;
}
-void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
+static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
{
u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_core_dev *dev = esw->dev;
@@ -534,14 +564,10 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
int err;
if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return;
+ return -EOPNOTSUPP;
if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
- return;
-
- mutex_lock(&esw->state_lock);
- if (esw->qos.enabled)
- goto unlock;
+ return -EOPNOTSUPP;
MLX5_SET(scheduling_context, tsar_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
@@ -555,75 +581,94 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
&esw->qos.root_tsar_ix);
if (err) {
esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
- goto unlock;
+ return err;
}
INIT_LIST_HEAD(&esw->qos.groups);
if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
- esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
+ esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
if (IS_ERR(esw->qos.group0)) {
esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
PTR_ERR(esw->qos.group0));
+ err = PTR_ERR(esw->qos.group0);
goto err_group0;
}
}
- esw->qos.enabled = true;
-unlock:
- mutex_unlock(&esw->state_lock);
- return;
+ refcount_set(&esw->qos.refcnt, 1);
+
+ return 0;
err_group0:
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- esw->qos.root_tsar_ix);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
- mutex_unlock(&esw->state_lock);
+ if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix))
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
+
+ return err;
}
-void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
+static void esw_qos_destroy(struct mlx5_eswitch *esw)
{
- struct devlink *devlink = priv_to_devlink(esw->dev);
int err;
- devlink_rate_nodes_destroy(devlink);
- mutex_lock(&esw->state_lock);
- if (!esw->qos.enabled)
- goto unlock;
-
if (esw->qos.group0)
- esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+ __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
err = mlx5_destroy_scheduling_element_cmd(esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH,
esw->qos.root_tsar_ix);
if (err)
esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+}
- esw->qos.enabled = false;
-unlock:
- mutex_unlock(&esw->state_lock);
+static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ lockdep_assert_held(&esw->state_lock);
+
+ if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
+ /* esw_qos_create() set refcount to 1 only on success.
+ * No need to decrement on failure.
+ */
+ err = esw_qos_create(esw, extack);
+ }
+
+ return err;
+}
+
+static void esw_qos_put(struct mlx5_eswitch *esw)
+{
+ lockdep_assert_held(&esw->state_lock);
+ if (refcount_dec_and_test(&esw->qos.refcnt))
+ esw_qos_destroy(esw);
}
-int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
- u32 max_rate, u32 bw_share)
+static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
{
int err;
lockdep_assert_held(&esw->state_lock);
- if (!esw->qos.enabled)
+ if (vport->qos.enabled)
return 0;
- if (vport->qos.enabled)
- return -EEXIST;
+ err = esw_qos_get(esw, extack);
+ if (err)
+ return err;
vport->qos.group = esw->qos.group0;
err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
- if (!err) {
- vport->qos.enabled = true;
- trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
- }
+ if (err)
+ goto err_out;
+
+ vport->qos.enabled = true;
+ trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
+
+ return 0;
+
+err_out:
+ esw_qos_put(esw);
return err;
}
@@ -633,7 +678,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
int err;
lockdep_assert_held(&esw->state_lock);
- if (!esw->qos.enabled || !vport->qos.enabled)
+ if (!vport->qos.enabled)
return;
WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
"Disabling QoS on port before detaching it from group");
@@ -645,8 +690,27 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
vport->vport, err);
- vport->qos.enabled = false;
+ memset(&vport->qos, 0, sizeof(vport->qos));
trace_mlx5_esw_vport_qos_destroy(vport);
+
+ esw_qos_put(esw);
+}
+
+int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 min_rate)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
+ if (err)
+ return err;
+
+ err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
+ if (!err)
+ err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
+
+ return err;
}
int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
@@ -654,22 +718,29 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32
u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_vport *vport;
u32 bitmask;
+ int err;
vport = mlx5_eswitch_get_vport(esw, vport_num);
if (IS_ERR(vport))
return PTR_ERR(vport);
- if (!vport->qos.enabled)
- return -EOPNOTSUPP;
-
- MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
- bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ mutex_lock(&esw->state_lock);
+ if (!vport->qos.enabled) {
+ /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
+ err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
+ } else {
+ MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+
+ bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ err = mlx5_modify_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+ }
+ mutex_unlock(&esw->state_lock);
- return mlx5_modify_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- ctx,
- vport->qos.esw_tsar_ix,
- bitmask);
+ return err;
}
#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
@@ -728,7 +799,12 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void
return err;
mutex_lock(&esw->state_lock);
- err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (err)
+ goto unlock;
+
+ err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+unlock:
mutex_unlock(&esw->state_lock);
return err;
}
@@ -749,7 +825,12 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *
return err;
mutex_lock(&esw->state_lock);
- err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (err)
+ goto unlock;
+
+ err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+unlock:
mutex_unlock(&esw->state_lock);
return err;
}
@@ -843,10 +924,16 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
struct mlx5_esw_rate_group *group,
struct netlink_ext_ack *extack)
{
- int err;
+ int err = 0;
mutex_lock(&esw->state_lock);
- err = esw_qos_vport_update_group(esw, vport, group, extack);
+ if (!vport->qos.enabled && !group)
+ goto unlock;
+
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (!err)
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+unlock:
mutex_unlock(&esw->state_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
index 28451abe2d2f..0141e9d52037 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -6,18 +6,8 @@
#ifdef CONFIG_MLX5_ESWITCH
-int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
- struct mlx5_vport *evport,
- u32 min_rate,
- struct netlink_ext_ack *extack);
-int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
- struct mlx5_vport *evport,
- u32 max_rate,
- struct netlink_ext_ack *extack);
-void mlx5_esw_qos_create(struct mlx5_eswitch *esw);
-void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw);
-int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
- u32 max_rate, u32 bw_share);
+int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 max_rate, u32 min_rate);
void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 51a8cecc4a7c..2169486c4bfb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/mpfs.h>
+#include <linux/debugfs.h>
#include "esw/acl/lgcy.h"
#include "esw/legacy.h"
#include "esw/qos.h"
@@ -781,9 +782,6 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
if (err)
return err;
- /* Attach vport to the eswitch rate limiter */
- mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
-
if (mlx5_esw_is_manager_vport(esw, vport_num))
return 0;
@@ -1005,6 +1003,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
if (err)
return err;
+ mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
err = esw_offloads_load_rep(esw, vport_num);
if (err)
goto err_rep;
@@ -1012,6 +1011,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
return err;
err_rep:
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
return err;
}
@@ -1019,6 +1019,7 @@ err_rep:
void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
esw_offloads_unload_rep(esw, vport_num);
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
}
@@ -1155,8 +1156,6 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs)
{
const u32 *out;
- WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
-
if (num_vfs < 0)
return;
@@ -1189,6 +1188,9 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
int total_vports;
int err;
+ if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED)
+ return 0;
+
total_vports = mlx5_eswitch_get_total_vports(dev);
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
@@ -1206,6 +1208,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
} else {
esw_warn(dev, "ingress ACL is not supported by FW\n");
}
+ esw->flags |= MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
return 0;
err:
@@ -1218,6 +1221,7 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
{
struct mlx5_core_dev *dev = esw->dev;
+ esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
mlx5_fs_ingress_acls_cleanup(dev);
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
@@ -1227,7 +1231,6 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
/**
* mlx5_eswitch_enable_locked - Enable eswitch
* @esw: Pointer to eswitch
- * @mode: Eswitch mode to enable
* @num_vfs: Enable eswitch for given number of VFs. This is optional.
* Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS.
* Caller should pass num_vfs > 0 when enabling eswitch for
@@ -1241,7 +1244,7 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
* mode. If num_vfs >=0 is provided, it setup VF related eswitch vports.
* It returns 0 on success or error code on failure.
*/
-int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
{
int err;
@@ -1260,11 +1263,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
- mlx5_esw_qos_create(esw);
-
- esw->mode = mode;
-
- if (mode == MLX5_ESWITCH_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
mlx5_rescan_drivers(esw->dev);
@@ -1274,23 +1273,19 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
if (err)
goto abort;
+ esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
+
mlx5_eswitch_event_handlers_register(esw);
esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
- mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
esw->esw_funcs.num_vfs, esw->enabled_vports);
- mlx5_esw_mode_change_notify(esw, mode);
+ mlx5_esw_mode_change_notify(esw, esw->mode);
return 0;
abort:
- esw->mode = MLX5_ESWITCH_NONE;
-
- if (mode == MLX5_ESWITCH_OFFLOADS)
- mlx5_rescan_drivers(esw->dev);
-
- mlx5_esw_qos_destroy(esw);
mlx5_esw_acls_ns_cleanup(esw);
return err;
}
@@ -1311,14 +1306,16 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
if (!mlx5_esw_allowed(esw))
return 0;
- toggle_lag = esw->mode == MLX5_ESWITCH_NONE;
+ devl_assert_locked(priv_to_devlink(esw->dev));
+
+ toggle_lag = !mlx5_esw_is_fdb_created(esw);
if (toggle_lag)
mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
- if (esw->mode == MLX5_ESWITCH_NONE) {
- ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
+ if (!mlx5_esw_is_fdb_created(esw)) {
+ ret = mlx5_eswitch_enable_locked(esw, num_vfs);
} else {
enum mlx5_eswitch_vport_event vport_events;
@@ -1336,53 +1333,82 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
return ret;
}
-void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
+/* When disabling sriov, free driver level resources. */
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
{
- int old_mode;
-
- lockdep_assert_held_write(&esw->mode_lock);
-
- if (esw->mode == MLX5_ESWITCH_NONE)
+ if (!mlx5_esw_allowed(esw))
return;
- esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ devl_assert_locked(priv_to_devlink(esw->dev));
+ down_write(&esw->mode_lock);
+ /* If driver is unloaded, this function is called twice by remove_one()
+ * and mlx5_unload(). Prevent the second call.
+ */
+ if (!esw->esw_funcs.num_vfs && !clear_vf)
+ goto unlock;
+
+ esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n",
esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
esw->esw_funcs.num_vfs, esw->enabled_vports);
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+ if (clear_vf)
+ mlx5_eswitch_clear_vf_vports_info(esw);
+ /* If disabling sriov in switchdev mode, free meta rules here
+ * because it depends on num_vfs.
+ */
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS) {
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
+ devl_rate_nodes_destroy(devlink);
+ }
+
+ esw->esw_funcs.num_vfs = 0;
+
+unlock:
+ up_write(&esw->mode_lock);
+}
+
+/* Free resources for corresponding eswitch mode. It is called by devlink
+ * when changing eswitch mode or modprobe when unloading driver.
+ */
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
/* Notify eswitch users that it is exiting from current mode.
* So that it can do necessary cleanup before the eswitch is disabled.
*/
- mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_NONE);
+ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY);
mlx5_eswitch_event_handlers_unregister(esw);
- if (esw->mode == MLX5_ESWITCH_LEGACY)
- esw_legacy_disable(esw);
- else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
- esw_offloads_disable(esw);
-
- old_mode = esw->mode;
- esw->mode = MLX5_ESWITCH_NONE;
-
- if (old_mode == MLX5_ESWITCH_OFFLOADS)
- mlx5_rescan_drivers(esw->dev);
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
- mlx5_esw_qos_destroy(esw);
- mlx5_esw_acls_ns_cleanup(esw);
+ if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) {
+ esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ esw_offloads_disable(esw);
+ else if (esw->mode == MLX5_ESWITCH_LEGACY)
+ esw_legacy_disable(esw);
+ mlx5_esw_acls_ns_cleanup(esw);
+ }
- if (clear_vf)
- mlx5_eswitch_clear_vf_vports_info(esw);
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ devl_rate_nodes_destroy(devlink);
}
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
{
if (!mlx5_esw_allowed(esw))
return;
+ devl_assert_locked(priv_to_devlink(esw->dev));
mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
- mlx5_eswitch_disable_locked(esw, clear_vf);
- esw->esw_funcs.num_vfs = 0;
+ mlx5_eswitch_disable_locked(esw);
up_write(&esw->mode_lock);
mlx5_lag_enable_change(esw->dev);
}
@@ -1573,22 +1599,25 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
- lockdep_register_key(&esw->mode_lock_key);
init_rwsem(&esw->mode_lock);
- lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
+ refcount_set(&esw->qos.refcnt, 0);
esw->enabled_vports = 0;
- esw->mode = MLX5_ESWITCH_NONE;
+ esw->mode = MLX5_ESWITCH_LEGACY;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+ esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
esw_info(dev,
"Total vports %d, per vport: max uc(%d) max mc(%d)\n",
esw->total_vports,
@@ -1612,9 +1641,10 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
+ debugfs_remove_recursive(esw->dbgfs);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
- lockdep_unregister_key(&esw->mode_lock_key);
+ WARN_ON(refcount_read(&esw->qos.refcnt));
mutex_destroy(&esw->state_lock);
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
xa_destroy(&esw->offloads.vhca_map);
@@ -1703,82 +1733,6 @@ bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
}
-static bool
-is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
-{
- return vport_num == MLX5_VPORT_PF ||
- mlx5_eswitch_is_vf_vport(esw, vport_num) ||
- mlx5_esw_is_sf_vport(esw, vport_num);
-}
-
-int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
- u8 *hw_addr, int *hw_addr_len,
- struct netlink_ext_ack *extack)
-{
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- int err = -EOPNOTSUPP;
- u16 vport_num;
-
- esw = mlx5_devlink_eswitch_get(port->devlink);
- if (IS_ERR(esw))
- return PTR_ERR(esw);
-
- vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
- if (!is_port_function_supported(esw, vport_num))
- return -EOPNOTSUPP;
-
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport)) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid port");
- return PTR_ERR(vport);
- }
-
- mutex_lock(&esw->state_lock);
- if (vport->enabled) {
- ether_addr_copy(hw_addr, vport->info.mac);
- *hw_addr_len = ETH_ALEN;
- err = 0;
- }
- mutex_unlock(&esw->state_lock);
- return err;
-}
-
-int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
- const u8 *hw_addr, int hw_addr_len,
- struct netlink_ext_ack *extack)
-{
- struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- int err = -EOPNOTSUPP;
- u16 vport_num;
-
- esw = mlx5_devlink_eswitch_get(port->devlink);
- if (IS_ERR(esw)) {
- NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
- return PTR_ERR(esw);
- }
-
- vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
- if (!is_port_function_supported(esw, vport_num)) {
- NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr");
- return -EINVAL;
- }
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport)) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid port");
- return PTR_ERR(vport);
- }
-
- mutex_lock(&esw->state_lock);
- if (vport->enabled)
- err = mlx5_esw_set_vport_mac_locked(esw, vport, hw_addr);
- else
- NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
- mutex_unlock(&esw->state_lock);
- return err;
-}
-
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
u16 vport, int link_state)
{
@@ -1835,8 +1789,10 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
ivi->qos = evport->info.qos;
ivi->spoofchk = evport->info.spoofchk;
ivi->trusted = evport->info.trusted;
- ivi->min_tx_rate = evport->qos.min_rate;
- ivi->max_tx_rate = evport->qos.max_rate;
+ if (evport->qos.enabled) {
+ ivi->min_tx_rate = evport->qos.min_rate;
+ ivi->max_tx_rate = evport->qos.max_rate;
+ }
mutex_unlock(&esw->state_lock);
return 0;
@@ -1951,7 +1907,7 @@ u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
{
struct mlx5_eswitch *esw = dev->priv.eswitch;
- return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE;
+ return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
@@ -1966,17 +1922,6 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
-bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
-{
- if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
- (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
- return true;
-
- return false;
-}
-
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1)
{
@@ -2082,23 +2027,10 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
*/
void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
- if (!mlx5_esw_allowed(esw))
- return;
up_write(&esw->mode_lock);
}
/**
- * mlx5_esw_lock() - Take write lock on esw mode lock
- * @esw: eswitch device.
- */
-void mlx5_esw_lock(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_allowed(esw))
- return;
- down_write(&esw->mode_lock);
-}
-
-/**
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
*
* @dev: Pointer to core device
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 42f8ee2e5d9f..f68dc2d0dbe6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -113,8 +113,11 @@ struct vport_ingress {
* packet with metadata.
*/
struct mlx5_flow_group *metadata_allmatch_grp;
+ /* Optional group to add a drop all rule */
+ struct mlx5_flow_group *drop_grp;
struct mlx5_modify_hdr *modify_metadata;
struct mlx5_flow_handle *modify_metadata_rule;
+ struct mlx5_flow_handle *drop_rule;
} offloads;
};
@@ -188,6 +191,7 @@ struct mlx5_vport {
enum mlx5_eswitch_vport_event enabled_events;
int index;
struct devlink_port *dl_port;
+ struct dentry *dbgfs;
};
struct mlx5_esw_indir_table;
@@ -240,6 +244,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
+ struct mlx5_flow_group *vport_rx_drop_group;
+ struct mlx5_flow_handle *vport_rx_drop_rule;
struct xarray vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
@@ -279,10 +285,15 @@ struct mlx5_esw_functions {
enum {
MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1),
+ MLX5_ESWITCH_VPORT_ACL_NS_CREATED = BIT(2),
};
struct mlx5_esw_bridge_offloads;
+enum {
+ MLX5_ESW_FDB_CREATED = BIT(0),
+};
+
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
@@ -308,10 +319,14 @@ struct mlx5_eswitch {
atomic64_t user_count;
struct {
- bool enabled;
u32 root_tsar_ix;
struct mlx5_esw_rate_group *group0;
struct list_head groups; /* Protected by esw->state_lock */
+
+ /* Protected by esw->state_lock.
+ * Initially 0, meaning no QoS users and QoS is disabled.
+ */
+ refcount_t refcnt;
} qos;
struct mlx5_esw_bridge_offloads *br_offloads;
@@ -324,7 +339,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
- struct lock_class_key mode_lock_key;
+ struct dentry *dbgfs;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -332,6 +347,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule);
+
bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
@@ -339,18 +358,16 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
-bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
-int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
-
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1)
-int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs);
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs);
int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
-void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf);
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -447,22 +464,6 @@ enum {
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2),
};
-enum {
- MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0),
- MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1),
- MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2),
- MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3),
- MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4),
- MLX5_ESW_ATTR_FLAG_ACCEPT = BIT(5),
-};
-
-/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
-static inline bool
-mlx5_esw_attr_flags_skip(u32 attr_flags)
-{
- return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT);
-}
-
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
struct mlx5_core_dev *in_mdev;
@@ -486,6 +487,7 @@ struct mlx5_esw_flow_attr {
int src_port_rewrite_act_id;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_rx_tun_attr *rx_tun_attr;
+ struct ethhdr eth;
struct mlx5_pkt_reformat *decap_pkt_reformat;
};
@@ -516,11 +518,6 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags);
-static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw)
-{
- return esw->qos.enabled;
-}
-
static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
u8 vlan_depth)
{
@@ -534,8 +531,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
}
-bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1);
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
@@ -594,6 +589,11 @@ mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index)
return dl_port_index & 0xffff;
}
+static inline bool mlx5_esw_is_fdb_created(struct mlx5_eswitch *esw)
+{
+ return esw->fdb_table.flags & MLX5_ESW_FDB_CREATED;
+}
+
/* TODO: This mlx5e_tc function shouldn't be called by eswitch */
void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
@@ -691,6 +691,9 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
@@ -722,7 +725,6 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
void mlx5_esw_put(struct mlx5_core_dev *dev);
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
-void mlx5_esw_lock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
@@ -739,8 +741,8 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
-static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
-static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
@@ -749,9 +751,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
return ERR_PTR(-EOPNOTSUPP);
}
-static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
-static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
-
static inline struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 32bc08a39925..728ca9f2bb9d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -49,6 +49,7 @@
#include "en_tc.h"
#include "en/mapping.h"
#include "devlink.h"
+#include "lag/lag.h"
#define mlx5_esw_for_each_rep(esw, i, rep) \
xa_for_each(&((esw)->offloads.vport_reps), i, rep)
@@ -69,6 +70,8 @@
#define MLX5_ESW_VPORT_TBL_SIZE 128
#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4
+#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+
static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
@@ -139,7 +142,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
if (mlx5_esw_indir_table_decap_vport(attr))
vport = mlx5_esw_indir_table_decap_vport(attr);
- if (esw_attr->int_port)
+ if (attr && !attr->chain && esw_attr->int_port)
metadata =
mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
else
@@ -180,7 +183,7 @@ esw_setup_decap_indir(struct mlx5_eswitch *esw,
{
struct mlx5_flow_table *ft;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
return -EOPNOTSUPP;
ft = mlx5_esw_indir_table_get(esw, attr, spec,
@@ -201,12 +204,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
static int
esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
struct mlx5_flow_act *flow_act,
- struct mlx5_flow_attr *attr,
+ u32 sampler_id,
int i)
{
flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
- dest[i].sampler_id = attr->sample_attr->sampler_id;
+ dest[i].sampler_id = sampler_id;
return 0;
}
@@ -229,10 +232,8 @@ esw_setup_ft_dest(struct mlx5_flow_destination *dest,
}
static void
-esw_setup_slow_path_dest(struct mlx5_flow_destination *dest,
- struct mlx5_flow_act *flow_act,
- struct mlx5_fs_chains *chains,
- int i)
+esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains, int i)
{
if (mlx5_chains_ignore_flow_level_supported(chains))
flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
@@ -240,6 +241,16 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest,
dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
}
+static void
+esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, int i)
+{
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = esw->fdb_table.offloads.slow_fdb;
+}
+
static int
esw_setup_chain_dest(struct mlx5_flow_destination *dest,
struct mlx5_flow_act *flow_act,
@@ -295,26 +306,28 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
int *i)
{
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- int j, err;
+ int err;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
return -EOPNOTSUPP;
- for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
- err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
- if (err)
- goto err_setup_chain;
+ /* flow steering cannot handle more than one dest with the same ft
+ * in a single flow
+ */
+ if (esw_attr->out_count - esw_attr->split_count > 1)
+ return -EOPNOTSUPP;
- if (esw_attr->dests[j].pkt_reformat) {
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat;
- }
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
+ if (err)
+ return err;
+
+ if (esw_attr->dests[esw_attr->split_count].pkt_reformat) {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat;
}
- return 0;
+ (*i)++;
-err_setup_chain:
- esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
- return err;
+ return 0;
}
static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
@@ -362,7 +375,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest,
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
int j, err;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
return -EOPNOTSUPP;
for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
@@ -416,6 +429,9 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
dest[dest_idx].vport.vhca_id =
MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
+ mlx5_lag_mpesw_is_activated(esw->dev))
+ dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
if (pkt_reformat) {
@@ -461,20 +477,17 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
esw_src_port_rewrite_supported(esw))
- attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+ attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
- esw_setup_sampler_dest(dest, flow_act, attr, *i);
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE &&
+ !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) {
+ esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
(*i)++;
- } else if (attr->dest_ft) {
- esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
+ } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) {
+ esw_setup_slow_path_dest(dest, flow_act, esw, *i);
(*i)++;
- } else if (mlx5_esw_attr_flags_skip(attr->flags)) {
- esw_setup_slow_path_dest(dest, flow_act, chains, *i);
- (*i)++;
- } else if (attr->dest_chain) {
- err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
- 1, 0, *i);
+ } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) {
+ esw_setup_accept_dest(dest, flow_act, chains, *i);
(*i)++;
} else if (esw_is_indir_table(esw, attr)) {
err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i);
@@ -482,6 +495,15 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
} else {
*i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
+
+ if (attr->dest_ft) {
+ err = esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
+ (*i)++;
+ } else if (attr->dest_chain) {
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
+ 1, 0, *i);
+ (*i)++;
+ }
}
return err;
@@ -496,7 +518,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw,
if (attr->dest_ft) {
esw_cleanup_decap_indir(esw, attr);
- } else if (!mlx5_esw_attr_flags_skip(attr->flags)) {
+ } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
if (attr->dest_chain)
esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
else if (esw_is_indir_table(esw, attr))
@@ -506,6 +528,20 @@ esw_cleanup_dests(struct mlx5_eswitch *esw,
}
}
+static void
+esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = attr->meter_attr.meter;
+ flow_act->exe_aso.type = attr->exe_aso_type;
+ flow_act->exe_aso.object_id = meter->obj_id;
+ flow_act->exe_aso.flow_meter.meter_idx = meter->idx;
+ flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN;
+ /* use metadata reg 5 for packet color */
+ flow_act->exe_aso.return_reg_id = 5;
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
@@ -573,6 +609,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_hdr = attr->modify_hdr;
+ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)
+ esw_setup_meter(attr, &flow_act);
+
if (split) {
fwd_attr.chain = attr->chain;
fwd_attr.prio = attr->prio;
@@ -587,7 +627,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
else
fdb = attr->ft;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT))
+ if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
mlx5_eswitch_set_rule_source_port(esw, spec, attr,
esw_attr->in_mdev->priv.eswitch,
esw_attr->in_rep->vport);
@@ -719,7 +759,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
mlx5_del_flow_rules(rule);
- if (!mlx5_esw_attr_flags_skip(attr->flags)) {
+ if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
/* unref the term table */
for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
if (esw_attr->dests[i].termtbl)
@@ -861,7 +901,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (err)
goto unlock;
- attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED;
vport = esw_vlan_action_get_vport(esw_attr, push, pop);
@@ -869,7 +909,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
/* tracks VF --> wire rules without vlan push action */
if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
vport->vlan_refcount++;
- attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
}
goto unlock;
@@ -900,7 +940,7 @@ skip_set_push:
}
out:
if (!err)
- attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
unlock:
mutex_unlock(&esw->state_lock);
return err;
@@ -919,7 +959,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
return 0;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED))
+ if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED))
return 0;
push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
@@ -1022,43 +1062,23 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule)
{
- struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
- int i = 0, num_vfs = esw->esw_funcs.num_vfs;
-
- if (!num_vfs || !flows)
- return;
-
- for (i = 0; i < num_vfs; i++)
- mlx5_del_flow_rules(flows[i]);
-
- kvfree(flows);
+ if (rule)
+ mlx5_del_flow_rules(rule);
}
-static int
-mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num)
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {0};
- int num_vfs, rule_idx = 0, err = 0;
struct mlx5_flow_handle *flow_rule;
- struct mlx5_flow_handle **flows;
struct mlx5_flow_spec *spec;
- struct mlx5_vport *vport;
- unsigned long i;
- u16 vport_num;
-
- num_vfs = esw->esw_funcs.num_vfs;
- flows = kvcalloc(num_vfs, sizeof(*flows), GFP_KERNEL);
- if (!flows)
- return -ENOMEM;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec) {
- err = -ENOMEM;
- goto alloc_err;
- }
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
MLX5_SET(fte_match_param, spec->match_criteria,
misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
@@ -1071,34 +1091,18 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
- vport_num = vport->vport;
- MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
- dest.vport.num = vport_num;
-
- flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
- spec, &flow_act, &dest, 1);
- if (IS_ERR(flow_rule)) {
- err = PTR_ERR(flow_rule);
- esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n",
- rule_idx, PTR_ERR(flow_rule));
- goto rule_err;
- }
- flows[rule_idx++] = flow_rule;
- }
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+ dest.vport.num = vport_num;
- esw->fdb_table.offloads.send_to_vport_meta_rules = flows;
- kvfree(spec);
- return 0;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n",
+ vport_num, PTR_ERR(flow_rule));
-rule_err:
- while (--rule_idx >= 0)
- mlx5_del_flow_rules(flows[rule_idx]);
kvfree(spec);
-alloc_err:
- kvfree(flows);
- return err;
+ return flow_rule;
}
static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
@@ -1623,18 +1627,200 @@ esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
#endif
+static int
+esw_create_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int count, err = 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
+
+ /* See comment at table_size calculation */
+ count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1);
+ *ix += count;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.offloads.send_to_vport_grp = g;
+
+out:
+ return err;
+}
+
+static int
+esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!esw_src_port_rewrite_supported(esw))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ end_flow_index, *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev,
+ "Failed to create send-to-vport meta flow group err(%d)\n", err);
+ goto send_vport_meta_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_meta_grp = g;
+
+ return 0;
+
+send_vport_meta_err:
+ return err;
+}
+
+static int
+esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
+
+out:
+ return err;
+}
+
+static int
+esw_create_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+ u8 *dmac;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ *ix + MLX5_ESW_MISS_FLOWS);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err);
+ goto miss_err;
+ }
+ esw->fdb_table.offloads.miss_grp = g;
+
+ err = esw_add_fdb_miss_rule(esw);
+ if (err)
+ goto miss_rule_err;
+
+ return 0;
+
+miss_rule_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+ return err;
+}
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
- int num_vfs, table_size, ix, err = 0;
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
+ int table_size, ix = 0, err = 0;
u32 flags = 0, *flow_group_in;
- struct mlx5_flow_group *g;
- void *match_criteria;
- u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
@@ -1668,7 +1854,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
* total vports of the peer (currently is also uses esw->total_vports).
*/
table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
- MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
+ esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS;
/* create the slow path fdb with encap set, so further table instances
* can be created at run time while VFs are probed if the FW allows that.
@@ -1709,139 +1895,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
goto fdb_chains_err;
}
- /* create send-to-vport group */
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
- }
-
- /* See comment above table_size calculation */
- ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
-
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
goto send_vport_err;
- }
- esw->fdb_table.offloads.send_to_vport_grp = g;
-
- if (esw_src_port_rewrite_supported(esw)) {
- /* meta send to vport */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS_2);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
- MLX5_SET(fte_match_param, match_criteria,
- misc_parameters_2.metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_mask());
- MLX5_SET(fte_match_param, match_criteria,
- misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
-
- num_vfs = esw->esw_funcs.num_vfs;
- if (num_vfs) {
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in,
- end_flow_index, ix + num_vfs - 1);
- ix += num_vfs;
-
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n",
- err);
- goto send_vport_meta_err;
- }
- esw->fdb_table.offloads.send_to_vport_meta_grp = g;
-
- err = mlx5_eswitch_add_send_to_vport_meta_rules(esw);
- if (err)
- goto meta_rule_err;
- }
- }
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
- /* create peer esw miss group */
- memset(flow_group_in, 0, inlen);
-
- esw_set_flow_group_source_port(esw, flow_group_in);
-
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- match_criteria = MLX5_ADDR_OF(create_flow_group_in,
- flow_group_in,
- match_criteria);
-
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
-
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
- }
-
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + esw->total_vports - 1);
- ix += esw->total_vports;
-
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
- goto peer_miss_err;
- }
- esw->fdb_table.offloads.peer_miss_grp = g;
- }
-
- /* create miss group */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_OUTER_HEADERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
- match_criteria);
- dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
- outer_headers.dmac_47_16);
- dmac[0] = 0x01;
-
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + MLX5_ESW_MISS_FLOWS);
+ err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto send_vport_meta_err;
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
- goto miss_err;
- }
- esw->fdb_table.offloads.miss_grp = g;
+ err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto peer_miss_err;
- err = esw_add_fdb_miss_rule(esw);
+ err = esw_create_miss_group(esw, fdb, flow_group_in, &ix);
if (err)
- goto miss_rule_err;
+ goto miss_err;
kvfree(flow_group_in);
return 0;
-miss_rule_err:
- mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
peer_miss_err:
- mlx5_eswitch_del_send_to_vport_meta_rules(esw);
-meta_rule_err:
if (esw->fdb_table.offloads.send_to_vport_meta_grp)
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
send_vport_meta_err:
@@ -1868,7 +1944,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
- mlx5_eswitch_del_send_to_vport_meta_rules(esw);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
if (esw->fdb_table.offloads.send_to_vport_meta_grp)
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
@@ -1886,7 +1961,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
atomic64_set(&esw->user_count, 0);
}
-static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw)
+static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw)
{
int nvports;
@@ -1911,7 +1986,8 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
return -EOPNOTSUPP;
}
- ft_attr.max_fte = esw_get_offloads_ft_size(esw);
+ ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) +
+ MLX5_ESW_FT_OFFLOADS_DROP_RULE;
ft_attr.prio = 1;
ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
@@ -1940,7 +2016,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
int nvports;
int err = 0;
- nvports = esw_get_offloads_ft_size(esw);
+ nvports = esw_get_nr_ft_offloads_steering_src_ports(esw);
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
@@ -1970,6 +2046,52 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
}
+static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw)
+{
+ /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+ * for the drop rule, which is placed at the end of the table.
+ * So return the total of vport and int_port as rule index.
+ */
+ return esw_get_nr_ft_offloads_steering_src_ports(esw);
+}
+
+static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int flow_index;
+ int err = 0;
+
+ flow_index = esw_create_vport_rx_drop_rule_index(esw);
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_drop_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_group)
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group);
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
struct mlx5_flow_destination *dest)
@@ -2018,6 +2140,32 @@ out:
return flow_rule;
}
+static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL,
+ &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "fs offloads: Failed to add vport rx drop rule err %ld\n",
+ PTR_ERR(flow_rule));
+ return PTR_ERR(flow_rule);
+ }
+
+ esw->offloads.vport_rx_drop_rule = flow_rule;
+
+ return 0;
+}
+
+static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_rule)
+ mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule);
+}
+
static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
@@ -2028,7 +2176,7 @@ static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
- if (esw->mode == MLX5_ESWITCH_NONE)
+ if (!mlx5_esw_is_fdb_created(esw))
return -EOPNOTSUPP;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
@@ -2162,20 +2310,15 @@ out_free:
static int esw_offloads_start(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1;
+ int err;
- mlx5_eswitch_disable_locked(esw, false);
- err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
- esw->dev->priv.sriov.num_vfs);
+ esw->mode = MLX5_ESWITCH_OFFLOADS;
+ err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch to offloads");
- err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
- MLX5_ESWITCH_IGNORE_NUM_VFS);
- if (err1) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed setting eswitch back to legacy");
- }
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ mlx5_rescan_drivers(esw->dev);
}
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
if (mlx5_eswitch_inline_mode_get(esw,
@@ -2376,60 +2519,6 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
}
-static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
- struct mlx5_core_dev *slave)
-{
- u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
- u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
- struct mlx5_eswitch *esw;
- struct mlx5_flow_root_namespace *root;
- struct mlx5_flow_namespace *ns;
- struct mlx5_vport *vport;
- int err;
-
- MLX5_SET(set_flow_table_root_in, in, opcode,
- MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
- MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
- MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
- MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
-
- if (master) {
- esw = master->priv.eswitch;
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
- MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
- MLX5_SET(set_flow_table_root_in, in, table_vport_number,
- MLX5_VPORT_UPLINK);
-
- ns = mlx5_get_flow_vport_acl_namespace(master,
- MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport->index);
- root = find_root(&ns->node);
- mutex_lock(&root->chain_lock);
-
- MLX5_SET(set_flow_table_root_in, in,
- table_eswitch_owner_vhca_id_valid, 1);
- MLX5_SET(set_flow_table_root_in, in,
- table_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(master, vhca_id));
- MLX5_SET(set_flow_table_root_in, in, table_id,
- root->root_ft->id);
- } else {
- esw = slave->priv.eswitch;
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
- ns = mlx5_get_flow_vport_acl_namespace(slave,
- MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport->index);
- root = find_root(&ns->node);
- mutex_lock(&root->chain_lock);
- MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
- }
-
- err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
- mutex_unlock(&root->chain_lock);
-
- return err;
-}
-
static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave)
{
@@ -2611,15 +2700,10 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
{
int err;
- err = esw_set_uplink_slave_ingress_root(master_esw->dev,
- slave_esw->dev);
- if (err)
- return -EINVAL;
-
err = esw_set_slave_root_fdb(master_esw->dev,
slave_esw->dev);
if (err)
- goto err_fdb;
+ return err;
err = esw_set_master_egress_rule(master_esw->dev,
slave_esw->dev);
@@ -2631,9 +2715,6 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
err_acl:
esw_set_slave_root_fdb(NULL, slave_esw->dev);
-err_fdb:
- esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
-
return err;
}
@@ -2642,7 +2723,6 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
{
esw_unset_master_egress_rule(master_esw->dev);
esw_set_slave_root_fdb(NULL, slave_esw->dev);
- esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
}
#define ESW_OFFLOADS_DEVCOM_PAIR (0)
@@ -2747,9 +2827,6 @@ static int mlx5_esw_offloads_devcom_event(int event,
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
- if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
- break;
-
if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
@@ -2801,6 +2878,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_register_component(devcom,
MLX5_DEVCOM_ESW_OFFLOADS,
mlx5_esw_offloads_devcom_event,
@@ -2818,6 +2898,9 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
@@ -2836,13 +2919,22 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
return false;
- if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
- mlx5_ecpf_vport_exists(esw->dev))
- return false;
-
return true;
}
+#define MLX5_ESW_METADATA_RSVD_UPLINK 1
+
+/* Share the same metadata for uplink's. This is fine because:
+ * (a) In shared FDB mode (LAG) both uplink's are treated the
+ * same and tagged with the same metadata.
+ * (b) In non shared FDB mode, packets from physical port0
+ * cannot hit eswitch of PF1 and vice versa.
+ */
+static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw)
+{
+ return MLX5_ESW_METADATA_RSVD_UPLINK;
+}
+
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
{
u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
@@ -2857,8 +2949,10 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
return 0;
/* Metadata is 4 bits of PFNUM and 12 bits of unique id */
- /* Use only non-zero vport_id (1-4095) for all PF's */
- id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL);
+ /* Use only non-zero vport_id (2-4095) for all PF's */
+ id = ida_alloc_range(&esw->offloads.vport_metadata_ida,
+ MLX5_ESW_METADATA_RSVD_UPLINK + 1,
+ vport_end_ida, GFP_KERNEL);
if (id < 0)
return 0;
id = (pf_num << ESW_VPORT_BITS) | id;
@@ -2876,7 +2970,11 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ vport->default_metadata = mlx5_esw_match_metadata_reserved(esw);
+ else
+ vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
+
vport->metadata = vport->default_metadata;
return vport->metadata ? 0 : -ENOSPC;
}
@@ -2887,6 +2985,9 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
if (!vport->default_metadata)
return;
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ return;
+
WARN_ON(vport->metadata != vport->default_metadata);
mlx5_esw_match_metadata_free(esw, vport->default_metadata);
}
@@ -2930,7 +3031,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable)
int err = 0;
down_write(&esw->mode_lock);
- if (esw->mode != MLX5_ESWITCH_NONE) {
+ if (mlx5_esw_is_fdb_created(esw)) {
err = -EBUSY;
goto done;
}
@@ -3060,8 +3161,20 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
if (err)
goto create_fg_err;
+ err = esw_create_vport_rx_drop_group(esw);
+ if (err)
+ goto create_rx_drop_fg_err;
+
+ err = esw_create_vport_rx_drop_rule(esw);
+ if (err)
+ goto create_rx_drop_rule_err;
+
return 0;
+create_rx_drop_rule_err:
+ esw_destroy_vport_rx_drop_group(esw);
+create_rx_drop_fg_err:
+ esw_destroy_vport_rx_group(esw);
create_fg_err:
esw_destroy_offloads_fdb_tables(esw);
create_fdb_err:
@@ -3079,6 +3192,8 @@ create_indir_err:
static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
{
+ esw_destroy_vport_rx_drop_rule(esw);
+ esw_destroy_vport_rx_drop_group(esw);
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_fdb_tables(esw);
esw_destroy_restore_table(esw);
@@ -3091,6 +3206,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
static void
esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
{
+ struct devlink *devlink;
bool host_pf_disabled;
u16 new_num_vfs;
@@ -3102,6 +3218,8 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
return;
+ devlink = priv_to_devlink(esw->dev);
+ devl_lock(devlink);
/* Number of VFs can only change from "0 to x" or "x to 0". */
if (esw->esw_funcs.num_vfs > 0) {
mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
@@ -3110,10 +3228,13 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
MLX5_VPORT_UC_ADDR_CHANGE);
- if (err)
+ if (err) {
+ devl_unlock(devlink);
return;
+ }
}
esw->esw_funcs.num_vfs = new_num_vfs;
+ devl_unlock(devlink);
}
static void esw_functions_changed_event_handler(struct work_struct *work)
@@ -3263,20 +3384,12 @@ err_metadata:
static int esw_offloads_stop(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1;
+ int err;
- mlx5_eswitch_disable_locked(esw, false);
- err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
- MLX5_ESWITCH_IGNORE_NUM_VFS);
- if (err) {
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS);
+ if (err)
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
- err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
- MLX5_ESWITCH_IGNORE_NUM_VFS);
- if (err1) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed setting eswitch back to offloads");
- }
- }
return err;
}
@@ -3370,15 +3483,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
return 0;
}
-static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw)
-{
- /* devlink commands in NONE eswitch mode are currently supported only
- * on ECPF.
- */
- return (esw->mode == MLX5_ESWITCH_NONE &&
- !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0;
-}
-
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
@@ -3405,6 +3509,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (cur_mlx5_mode == mlx5_mode)
goto unlock;
+ mlx5_eswitch_disable_locked(esw);
if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
if (mlx5_devlink_trap_get_num_active(esw->dev)) {
NL_SET_ERR_MSG_MOD(extack,
@@ -3415,6 +3520,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
err = esw_offloads_start(esw, extack);
} else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
err = esw_offloads_stop(esw, extack);
+ mlx5_rescan_drivers(esw->dev);
} else {
err = -EINVAL;
}
@@ -3436,12 +3542,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
return PTR_ERR(esw);
down_write(&esw->mode_lock);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
-
err = esw_mode_to_devlink(esw->mode, mode);
-unlock:
up_write(&esw->mode_lock);
return err;
}
@@ -3490,9 +3591,6 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
return PTR_ERR(esw);
down_write(&esw->mode_lock);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto out;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
@@ -3544,12 +3642,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return PTR_ERR(esw);
down_write(&esw->mode_lock);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
-
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
-unlock:
up_write(&esw->mode_lock);
return err;
}
@@ -3560,16 +3653,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw;
- int err;
+ int err = 0;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return PTR_ERR(esw);
down_write(&esw->mode_lock);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
(!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
@@ -3620,22 +3710,15 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
enum devlink_eswitch_encap_mode *encap)
{
struct mlx5_eswitch *esw;
- int err;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return PTR_ERR(esw);
-
down_write(&esw->mode_lock);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
-
*encap = esw->offloads.encap;
-unlock:
up_write(&esw->mode_lock);
- return err;
+ return 0;
}
static bool
@@ -3758,12 +3841,14 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
if (err)
goto devlink_err;
+ mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
err = mlx5_esw_offloads_rep_load(esw, vport_num);
if (err)
goto rep_err;
return 0;
rep_err:
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
devlink_err:
mlx5_esw_vport_disable(esw, vport_num);
@@ -3773,6 +3858,7 @@ devlink_err:
void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
{
mlx5_esw_offloads_rep_unload(esw, vport_num);
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
}
@@ -3867,3 +3953,62 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
return vport->metadata;
}
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);
+
+static bool
+is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_PF ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
+}
+
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
+ u8 *hw_addr, int *hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num))
+ return -EOPNOTSUPP;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(hw_addr, vport->info.mac);
+ *hw_addr_len = ETH_ALEN;
+ mutex_unlock(&esw->state_lock);
+ return 0;
+}
+
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw)) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
+ return PTR_ERR(esw);
+ }
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num)) {
+ NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr");
+ return -EINVAL;
+ }
+
+ return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 182306bbefaa..108a3503f413 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
sizeof(dest->vport.num), hash);
hash = jhash((const void *)&dest->vport.vhca_id,
sizeof(dest->vport.num), hash);
- if (dest->vport.pkt_reformat)
- hash = jhash(dest->vport.pkt_reformat,
- sizeof(*dest->vport.pkt_reformat),
+ if (flow_act->pkt_reformat)
+ hash = jhash(flow_act->pkt_reformat,
+ sizeof(*flow_act->pkt_reformat),
hash);
return hash;
}
@@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
if (ret)
return ret;
- return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ?
- memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat,
- sizeof(*dest1->vport.pkt_reformat)) : 0;
+ if (flow_act1->pkt_reformat && flow_act2->pkt_reformat)
+ return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat,
+ sizeof(*flow_act1->pkt_reformat));
+
+ return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat);
}
static int
@@ -219,12 +221,14 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
- mlx5_esw_attr_flags_skip(attr->flags) ||
+ mlx5e_tc_attr_flags_skip(attr->flags) ||
(!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port))
return false;
/* push vlan on RX */
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
return true;
/* hairpin */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index a1ac3a654962..9459e56ee90a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -36,6 +36,7 @@ static struct mlx5_nb events_nbs_ref[] = {
/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
/* QP/WQ resource events to forward */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
@@ -132,6 +133,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
case MLX5_EVENT_TYPE_DEVICE_TRACER:
return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ case MLX5_EVENT_TYPE_OBJECT_CHANGE:
+ return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
default:
return "Unrecognized event";
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 2ce4241459ce..39c03dcbd196 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/driver.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 2a984e82ae16..750c32050165 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -57,9 +57,6 @@ struct mlx5_fpga_device {
u32 mkey;
struct mlx5_uars_page *uar;
} conn_res;
-
- struct mlx5_fpga_ipsec *ipsec;
- struct mlx5_fpga_tls *tls;
};
#define mlx5_fpga_dbg(__adev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
deleted file mode 100644
index 8ec148010d62..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/rhashtable.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs_helpers.h>
-#include <linux/mlx5/fs.h>
-#include <linux/rbtree.h>
-
-#include "mlx5_core.h"
-#include "fs_cmd.h"
-#include "fpga/ipsec.h"
-#include "fpga/sdk.h"
-#include "fpga/core.h"
-
-enum mlx5_fpga_ipsec_cmd_status {
- MLX5_FPGA_IPSEC_CMD_PENDING,
- MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
- MLX5_FPGA_IPSEC_CMD_COMPLETE,
-};
-
-struct mlx5_fpga_ipsec_cmd_context {
- struct mlx5_fpga_dma_buf buf;
- enum mlx5_fpga_ipsec_cmd_status status;
- struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
- int status_code;
- struct completion complete;
- struct mlx5_fpga_device *dev;
- struct list_head list; /* Item in pending_cmds */
- u8 command[];
-};
-
-struct mlx5_fpga_esp_xfrm;
-
-struct mlx5_fpga_ipsec_sa_ctx {
- struct rhash_head hash;
- struct mlx5_ifc_fpga_ipsec_sa hw_sa;
- u32 sa_handle;
- struct mlx5_core_dev *dev;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
-};
-
-struct mlx5_fpga_esp_xfrm {
- unsigned int num_rules;
- struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
- struct mutex lock; /* xfrm lock */
- struct mlx5_accel_esp_xfrm accel_xfrm;
-};
-
-struct mlx5_fpga_ipsec_rule {
- struct rb_node node;
- struct fs_fte *fte;
- struct mlx5_fpga_ipsec_sa_ctx *ctx;
-};
-
-static const struct rhashtable_params rhash_sa = {
- /* Keep out "cmd" field from the key as it's
- * value is not constant during the lifetime
- * of the key object.
- */
- .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
- sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
- .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
- sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
- .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
- .automatic_shrinking = true,
- .min_size = 1,
-};
-
-struct mlx5_fpga_ipsec {
- struct mlx5_fpga_device *fdev;
- struct list_head pending_cmds;
- spinlock_t pending_cmds_lock; /* Protects pending_cmds */
- u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
- struct mlx5_fpga_conn *conn;
-
- struct notifier_block fs_notifier_ingress_bypass;
- struct notifier_block fs_notifier_egress;
-
- /* Map hardware SA --> SA context
- * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx)
- * We will use this hash to avoid SAs duplication in fpga which
- * aren't allowed
- */
- struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
- struct mutex sa_hash_lock;
-
- /* Tree holding all rules for this fpga device
- * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
- */
- struct rb_root rules_rb;
- struct mutex rules_rb_lock; /* rules lock */
-
- struct ida halloc;
-};
-
-bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
- return false;
-
- if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
- return false;
-
- return true;
-}
-
-static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf,
- u8 status)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
-
- if (status) {
- context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
- buf);
- mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
- status);
- context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
- complete(&context->complete);
- }
-}
-
-static inline
-int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
-{
- switch (syndrome) {
- case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
- return 0;
- case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
- return -EEXIST;
- case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
- return -EINVAL;
- case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
- return -EIO;
- }
- return -EIO;
-}
-
-static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
-{
- struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
- struct mlx5_fpga_ipsec_cmd_context *context;
- enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
- struct mlx5_fpga_device *fdev = cb_arg;
- unsigned long flags;
-
- if (buf->sg[0].size < sizeof(*resp)) {
- mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
- buf->sg[0].size, sizeof(*resp));
- return;
- }
-
- mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
- ntohl(resp->syndrome));
-
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
- struct mlx5_fpga_ipsec_cmd_context,
- list);
- if (context)
- list_del(&context->list);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
-
- if (!context) {
- mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
- return;
- }
- mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
-
- syndrome = ntohl(resp->syndrome);
- context->status_code = syndrome_to_errno(syndrome);
- context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
- memcpy(&context->resp, resp, sizeof(*resp));
-
- if (context->status_code)
- mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
- syndrome);
-
- complete(&context->complete);
-}
-
-static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
- const void *cmd, int cmd_size)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
- struct mlx5_fpga_device *fdev = mdev->fpga;
- unsigned long flags;
- int res;
-
- if (!fdev || !fdev->ipsec)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (cmd_size & 3)
- return ERR_PTR(-EINVAL);
-
- context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
- context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
- context->dev = fdev;
- context->buf.complete = mlx5_fpga_ipsec_send_complete;
- init_completion(&context->complete);
- memcpy(&context->command, cmd, cmd_size);
- context->buf.sg[0].size = cmd_size;
- context->buf.sg[0].data = &context->command;
-
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
- if (!res)
- list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
-
- if (res) {
- mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
- kfree(context);
- return ERR_PTR(res);
- }
-
- /* Context should be freed by the caller after completion. */
- return context;
-}
-
-static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
-{
- struct mlx5_fpga_ipsec_cmd_context *context = ctx;
- unsigned long timeout =
- msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
- int res;
-
- res = wait_for_completion_timeout(&context->complete, timeout);
- if (!res) {
- mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
- return -ETIMEDOUT;
- }
-
- if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
- res = context->status_code;
- else
- res = -EIO;
-
- return res;
-}
-
-static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
-{
- if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
- return true;
- return false;
-}
-
-static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
- int opcode)
-{
- struct mlx5_core_dev *dev = fdev->mdev;
- struct mlx5_ifc_fpga_ipsec_sa *sa;
- struct mlx5_fpga_ipsec_cmd_context *cmd_context;
- size_t sa_cmd_size;
- int err;
-
- hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
- if (is_v2_sadb_supported(fdev->ipsec))
- sa_cmd_size = sizeof(*hw_sa);
- else
- sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
-
- cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
- mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
- if (IS_ERR(cmd_context))
- return PTR_ERR(cmd_context);
-
- err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
- if (err)
- goto out;
-
- sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
- if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
- mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
- ntohl(sa->ipsec_sa_v1.sw_sa_handle),
- ntohl(cmd_context->resp.sw_sa_handle));
- err = -EIO;
- }
-
-out:
- kfree(cmd_context);
- return err;
-}
-
-u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- u32 ret = 0;
-
- if (mlx5_fpga_is_ipsec_device(mdev)) {
- ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
- ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
- } else {
- return ret;
- }
-
- if (!fdev->ipsec)
- return ret;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
- ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
- ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
- ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
- ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
- ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
- ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
- }
-
- return ret;
-}
-
-static unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!fdev || !fdev->ipsec)
- return 0;
-
- return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- number_of_ipsec_counters);
-}
-
-static int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int counters_count)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- unsigned int i;
- __be32 *data;
- u32 count;
- u64 addr;
- int ret;
-
- if (!fdev || !fdev->ipsec)
- return 0;
-
- addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- ipsec_counters_addr_low) +
- ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- ipsec_counters_addr_high) << 32);
-
- count = mlx5_fpga_ipsec_counters_count(mdev);
-
- data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
- MLX5_FPGA_ACCESS_TYPE_DONTCARE);
- if (ret < 0) {
- mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
- ret);
- goto out;
- }
- ret = 0;
-
- if (count > counters_count)
- count = counters_count;
-
- /* Each counter is low word, then high. But each word is big-endian */
- for (i = 0; i < count; i++)
- counters[i] = (u64)ntohl(data[i * 2]) |
- ((u64)ntohl(data[i * 2 + 1]) << 32);
-
-out:
- kfree(data);
- return ret;
-}
-
-static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
- struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
- int err;
-
- cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
- cmd.flags = htonl(flags);
- context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
- if (IS_ERR(context))
- return PTR_ERR(context);
-
- err = mlx5_fpga_ipsec_cmd_wait(context);
- if (err)
- goto out;
-
- if ((context->resp.flags & cmd.flags) != cmd.flags) {
- mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
- cmd.flags,
- context->resp.flags);
- err = -EIO;
- }
-
-out:
- kfree(context);
- return err;
-}
-
-static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
-{
- u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
- u32 flags = 0;
-
- if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
- flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
-
- return mlx5_fpga_ipsec_set_caps(mdev, flags);
-}
-
-static void
-mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
-{
- const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
-
- /* key */
- memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
- aes_gcm->key_len / 8);
- /* Duplicate 128 bit key twice according to HW layout */
- if (aes_gcm->key_len == 128)
- memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
- aes_gcm->aes_key, aes_gcm->key_len / 8);
-
- /* salt and seq_iv */
- memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
- sizeof(aes_gcm->seq_iv));
- memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
- sizeof(aes_gcm->salt));
-
- /* esn */
- if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
- hw_sa->ipsec_sa_v1.flags |=
- (xfrm_attrs->flags &
- MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
- hw_sa->esn = htonl(xfrm_attrs->esn);
- } else {
- hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
- hw_sa->ipsec_sa_v1.flags &=
- ~(xfrm_attrs->flags &
- MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
- hw_sa->esn = 0;
- }
-
- /* rx handle */
- hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
-
- /* enc mode */
- switch (aes_gcm->key_len) {
- case 128:
- hw_sa->ipsec_sa_v1.enc_mode =
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
- break;
- case 256:
- hw_sa->ipsec_sa_v1.enc_mode =
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
- break;
- }
-
- /* flags */
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
- MLX5_FPGA_IPSEC_SA_SPI_EN |
- MLX5_FPGA_IPSEC_SA_IP_ESP;
-
- if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
- else
- hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
-}
-
-static void
-mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
-{
- mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
-
- /* IPs */
- memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
- memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
-
- /* SPI */
- hw_sa->ipsec_sa_v1.spi = spi;
-
- /* flags */
- if (is_ipv6)
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
-}
-
-static bool is_full_mask(const void *p, size_t len)
-{
- WARN_ON(len % 4);
-
- return !memchr_inv(p, 0xff, len);
-}
-
-static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
- const u32 *match_c,
- const u32 *match_v)
-{
- const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- match_c,
- misc_parameters);
- const void *headers_c = MLX5_ADDR_OF(fte_match_param,
- match_c,
- outer_headers);
- const void *headers_v = MLX5_ADDR_OF(fte_match_param,
- match_v,
- outer_headers);
-
- if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
- const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4);
- const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
- ipv4)) ||
- !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
- ipv4)))
- return false;
- } else {
- const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6);
- const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
-
- if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6)) ||
- !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6)))
- return false;
- }
-
- if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- outer_esp_spi),
- MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
- return false;
-
- return true;
-}
-
-static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
- u8 match_criteria_enable,
- const u32 *match_c,
- const u32 *match_v)
-{
- u32 ipsec_dev_caps = mlx5_fpga_ipsec_device_caps(dev);
- bool ipv6_flow;
-
- ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
-
- if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
- mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
- mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
- mlx5_fs_is_vxlan_flow(match_c) ||
- !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
- ipv6_flow))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
- mlx5_fs_is_outer_ipsec_flow(match_c))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
- ipv6_flow)
- return false;
-
- if (!validate_fpga_full_mask(dev, match_c, match_v))
- return false;
-
- return true;
-}
-
-static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
- u8 match_criteria_enable,
- const u32 *match_c,
- const u32 *match_v,
- struct mlx5_flow_act *flow_act,
- struct mlx5_flow_context *flow_context)
-{
- const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
- MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
- bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
- MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
- int ret;
-
- ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
- match_v);
- if (!ret)
- return ret;
-
- if (is_dmac || is_smac ||
- (match_criteria_enable &
- ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
- (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
- return false;
-
- return true;
-}
-
-static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4], const __be32 daddr[4],
- const __be32 spi, bool is_ipv6, u32 *sa_handle)
-{
- struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- container_of(accel_xfrm, typeof(*fpga_xfrm),
- accel_xfrm);
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- int opcode, err;
- void *context;
-
- /* alloc SA */
- sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
- if (!sa_ctx)
- return ERR_PTR(-ENOMEM);
-
- sa_ctx->dev = mdev;
-
- /* build candidate SA */
- mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
- saddr, daddr, spi, is_ipv6,
- &sa_ctx->hw_sa);
-
- mutex_lock(&fpga_xfrm->lock);
-
- if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */
- /* all rules must be with same IPs and SPI */
- if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
- sizeof(sa_ctx->hw_sa))) {
- context = ERR_PTR(-EINVAL);
- goto exists;
- }
-
- ++fpga_xfrm->num_rules;
- context = fpga_xfrm->sa_ctx;
- goto exists;
- }
-
- if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) {
- err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL);
- if (err < 0) {
- context = ERR_PTR(err);
- goto exists;
- }
-
- sa_ctx->sa_handle = err;
- if (sa_handle)
- *sa_handle = sa_ctx->sa_handle;
- }
- /* This is unbounded fpga_xfrm, try to add to hash */
- mutex_lock(&fipsec->sa_hash_lock);
-
- err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa);
- if (err) {
- /* Can't bound different accel_xfrm to already existing sa_ctx.
- * This is because we can't support multiple ketmats for
- * same IPs and SPI
- */
- context = ERR_PTR(-EEXIST);
- goto unlock_hash;
- }
-
- /* Bound accel_xfrm to sa_ctx */
- opcode = is_v2_sadb_supported(fdev->ipsec) ?
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
- sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err) {
- context = ERR_PTR(err);
- goto delete_hash;
- }
-
- mutex_unlock(&fipsec->sa_hash_lock);
-
- ++fpga_xfrm->num_rules;
- fpga_xfrm->sa_ctx = sa_ctx;
- sa_ctx->fpga_xfrm = fpga_xfrm;
-
- mutex_unlock(&fpga_xfrm->lock);
-
- return sa_ctx;
-
-delete_hash:
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa));
-unlock_hash:
- mutex_unlock(&fipsec->sa_hash_lock);
- if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
- ida_free(&fipsec->halloc, sa_ctx->sa_handle);
-exists:
- mutex_unlock(&fpga_xfrm->lock);
- kfree(sa_ctx);
- return context;
-}
-
-static void *
-mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct fs_fte *fte,
- bool is_egress)
-{
- struct mlx5_accel_esp_xfrm *accel_xfrm;
- __be32 saddr[4], daddr[4], spi;
- struct mlx5_flow_group *fg;
- bool is_ipv6 = false;
-
- fs_get_obj(fg, fte->node.parent);
- /* validate */
- if (is_egress &&
- !mlx5_is_fpga_egress_ipsec_rule(mdev,
- fg->mask.match_criteria_enable,
- fg->mask.match_criteria,
- fte->val,
- &fte->action,
- &fte->flow_context))
- return ERR_PTR(-EINVAL);
- else if (!mlx5_is_fpga_ipsec_rule(mdev,
- fg->mask.match_criteria_enable,
- fg->mask.match_criteria,
- fte->val))
- return ERR_PTR(-EINVAL);
-
- /* get xfrm context */
- accel_xfrm =
- (struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
-
- /* IPs */
- if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
- fte->val)) {
- memcpy(&saddr[3],
- MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- fte->val,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- sizeof(saddr[3]));
- memcpy(&daddr[3],
- MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- fte->val,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- sizeof(daddr[3]));
- } else {
- memcpy(saddr,
- MLX5_ADDR_OF(fte_match_param,
- fte->val,
- outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(saddr));
- memcpy(daddr,
- MLX5_ADDR_OF(fte_match_param,
- fte->val,
- outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(daddr));
- is_ipv6 = true;
- }
-
- /* SPI */
- spi = MLX5_GET_BE(typeof(spi),
- fte_match_param, fte->val,
- misc_parameters.outer_esp_spi);
-
- /* create */
- return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
- saddr, daddr,
- spi, is_ipv6, NULL);
-}
-
-static void
-mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
-{
- struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- int opcode = is_v2_sadb_supported(fdev->ipsec) ?
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
- int err;
-
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
- sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err) {
- WARN_ON(err);
- return;
- }
-
- if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action ==
- MLX5_ACCEL_ESP_ACTION_DECRYPT)
- ida_free(&fipsec->halloc, sa_ctx->sa_handle);
-
- mutex_lock(&fipsec->sa_hash_lock);
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa));
- mutex_unlock(&fipsec->sa_hash_lock);
-}
-
-static void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
-
- mutex_lock(&fpga_xfrm->lock);
- if (!--fpga_xfrm->num_rules) {
- mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
- kfree(fpga_xfrm->sa_ctx);
- fpga_xfrm->sa_ctx = NULL;
- }
- mutex_unlock(&fpga_xfrm->lock);
-}
-
-static inline struct mlx5_fpga_ipsec_rule *
-_rule_search(struct rb_root *root, struct fs_fte *fte)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct mlx5_fpga_ipsec_rule *rule =
- container_of(node, struct mlx5_fpga_ipsec_rule,
- node);
-
- if (rule->fte < fte)
- node = node->rb_left;
- else if (rule->fte > fte)
- node = node->rb_right;
- else
- return rule;
- }
- return NULL;
-}
-
-static struct mlx5_fpga_ipsec_rule *
-rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
-{
- struct mlx5_fpga_ipsec_rule *rule;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- rule = _rule_search(&ipsec_dev->rules_rb, fte);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-
- return rule;
-}
-
-static inline int _rule_insert(struct rb_root *root,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- struct rb_node **new = &root->rb_node, *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct mlx5_fpga_ipsec_rule *this =
- container_of(*new, struct mlx5_fpga_ipsec_rule,
- node);
-
- parent = *new;
- if (rule->fte < this->fte)
- new = &((*new)->rb_left);
- else if (rule->fte > this->fte)
- new = &((*new)->rb_right);
- else
- return -EEXIST;
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&rule->node, parent, new);
- rb_insert_color(&rule->node, root);
-
- return 0;
-}
-
-static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- int ret;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- ret = _rule_insert(&ipsec_dev->rules_rb, rule);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-
- return ret;
-}
-
-static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- struct rb_root *root = &ipsec_dev->rules_rb;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- rb_erase(&rule->node, root);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-}
-
-static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- _rule_delete(ipsec_dev, rule);
- kfree(rule);
-}
-
-struct mailbox_mod {
- uintptr_t saved_esp_id;
- u32 saved_action;
- u32 saved_outer_esp_spi_value;
-};
-
-static void restore_spec_mailbox(struct fs_fte *fte,
- struct mailbox_mod *mbox_mod)
-{
- char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
- fte->val,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- mbox_mod->saved_outer_esp_spi_value);
- fte->action.action |= mbox_mod->saved_action;
- fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
-}
-
-static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
- struct fs_fte *fte,
- struct mailbox_mod *mbox_mod)
-{
- char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
- fte->val,
- misc_parameters);
-
- mbox_mod->saved_esp_id = fte->action.esp_id;
- mbox_mod->saved_action = fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
- mbox_mod->saved_outer_esp_spi_value =
- MLX5_GET(fte_match_set_misc, misc_params_v,
- outer_esp_spi);
-
- fte->action.esp_id = 0;
- fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
- if (!MLX5_CAP_FLOWTABLE(mdev,
- flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
-}
-
-static enum fs_flow_table_type egress_to_fs_ft(bool egress)
-{
- return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
-}
-
-static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg,
- bool is_egress)
-{
- int (*create_flow_group)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft, u32 *in,
- struct mlx5_flow_group *fg) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
- char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
- match_criteria.misc_parameters);
- struct mlx5_core_dev *dev = ns->dev;
- u32 saved_outer_esp_spi_mask;
- u8 match_criteria_enable;
- int ret;
-
- if (MLX5_CAP_FLOWTABLE(dev,
- flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- return create_flow_group(ns, ft, in, fg);
-
- match_criteria_enable =
- MLX5_GET(create_flow_group_in, in, match_criteria_enable);
- saved_outer_esp_spi_mask =
- MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
- if (!match_criteria_enable || !saved_outer_esp_spi_mask)
- return create_flow_group(ns, ft, in, fg);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
-
- if (!(*misc_params_c) &&
- !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
- MLX5_SET(create_flow_group_in, in, match_criteria_enable,
- match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
-
- ret = create_flow_group(ns, ft, in, fg);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
- MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
-
- return ret;
-}
-
-static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*create_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
- struct mlx5_core_dev *dev = ns->dev;
- struct mlx5_fpga_device *fdev = dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_ipsec_rule *rule;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return create_fte(ns, ft, fg, fte);
-
- rule = kzalloc(sizeof(*rule), GFP_KERNEL);
- if (!rule)
- return -ENOMEM;
-
- rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
- if (IS_ERR(rule->ctx)) {
- int err = PTR_ERR(rule->ctx);
-
- kfree(rule);
- return err;
- }
-
- rule->fte = fte;
- WARN_ON(rule_insert(fipsec, rule));
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = create_fte(ns, ft, fg, fte);
- restore_spec_mailbox(fte, &mbox_mod);
- if (ret) {
- _rule_delete(fipsec, rule);
- mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
- kfree(rule);
- }
-
- return ret;
-}
-
-static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*update_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
- struct mlx5_core_dev *dev = ns->dev;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return update_fte(ns, ft, fg, modify_mask, fte);
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = update_fte(ns, ft, fg, modify_mask, fte);
- restore_spec_mailbox(fte, &mbox_mod);
-
- return ret;
-}
-
-static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*delete_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
- struct mlx5_core_dev *dev = ns->dev;
- struct mlx5_fpga_device *fdev = dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_ipsec_rule *rule;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return delete_fte(ns, ft, fte);
-
- rule = rule_search(fipsec, fte);
- if (!rule)
- return -ENOENT;
-
- mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
- rule_delete(fipsec, rule);
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = delete_fte(ns, ft, fte);
- restore_spec_mailbox(fte, &mbox_mod);
-
- return ret;
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg)
-{
- return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
- true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_delete_fte(ns, ft, fte, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg)
-{
- return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
- false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_delete_fte(ns, ft, fte, false);
-}
-
-static struct mlx5_flow_cmds fpga_ipsec_ingress;
-static struct mlx5_flow_cmds fpga_ipsec_egress;
-
-const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- switch (type) {
- case FS_FT_NIC_RX:
- return &fpga_ipsec_ingress;
- case FS_FT_NIC_TX:
- return &fpga_ipsec_egress;
- default:
- WARN_ON(true);
- return NULL;
- }
-}
-
-static int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_conn_attr init_attr = {0};
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_conn *conn;
- int err;
-
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return 0;
-
- fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
- if (!fdev->ipsec)
- return -ENOMEM;
-
- fdev->ipsec->fdev = fdev;
-
- err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
- fdev->ipsec->caps);
- if (err) {
- mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
- err);
- goto error;
- }
-
- INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
- spin_lock_init(&fdev->ipsec->pending_cmds_lock);
-
- init_attr.rx_size = SBU_QP_QUEUE_SIZE;
- init_attr.tx_size = SBU_QP_QUEUE_SIZE;
- init_attr.recv_cb = mlx5_fpga_ipsec_recv;
- init_attr.cb_arg = fdev;
- conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
- if (IS_ERR(conn)) {
- err = PTR_ERR(conn);
- mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
- err);
- goto error;
- }
- fdev->ipsec->conn = conn;
-
- err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
- if (err)
- goto err_destroy_conn;
- mutex_init(&fdev->ipsec->sa_hash_lock);
-
- fdev->ipsec->rules_rb = RB_ROOT;
- mutex_init(&fdev->ipsec->rules_rb_lock);
-
- err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
- if (err) {
- mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
- err);
- goto err_destroy_hash;
- }
-
- ida_init(&fdev->ipsec->halloc);
-
- return 0;
-
-err_destroy_hash:
- rhashtable_destroy(&fdev->ipsec->sa_hash);
-
-err_destroy_conn:
- mlx5_fpga_sbu_conn_destroy(conn);
-
-error:
- kfree(fdev->ipsec);
- fdev->ipsec = NULL;
- return err;
-}
-
-static void destroy_rules_rb(struct rb_root *root)
-{
- struct mlx5_fpga_ipsec_rule *r, *tmp;
-
- rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
- rb_erase(&r->node, root);
- mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
- kfree(r);
- }
-}
-
-static void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return;
-
- ida_destroy(&fdev->ipsec->halloc);
- destroy_rules_rb(&fdev->ipsec->rules_rb);
- rhashtable_destroy(&fdev->ipsec->sa_hash);
-
- mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
- kfree(fdev->ipsec);
- fdev->ipsec = NULL;
-}
-
-void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
- /* ingress */
- fpga_ipsec_ingress.create_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
- fpga_ipsec_ingress.destroy_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
- fpga_ipsec_ingress.modify_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
- fpga_ipsec_ingress.create_flow_group =
- mlx5_fpga_ipsec_fs_create_flow_group_ingress;
- fpga_ipsec_ingress.destroy_flow_group =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
- fpga_ipsec_ingress.create_fte =
- mlx5_fpga_ipsec_fs_create_fte_ingress;
- fpga_ipsec_ingress.update_fte =
- mlx5_fpga_ipsec_fs_update_fte_ingress;
- fpga_ipsec_ingress.delete_fte =
- mlx5_fpga_ipsec_fs_delete_fte_ingress;
- fpga_ipsec_ingress.update_root_ft =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
-
- /* egress */
- fpga_ipsec_egress.create_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
- fpga_ipsec_egress.destroy_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
- fpga_ipsec_egress.modify_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
- fpga_ipsec_egress.create_flow_group =
- mlx5_fpga_ipsec_fs_create_flow_group_egress;
- fpga_ipsec_egress.destroy_flow_group =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
- fpga_ipsec_egress.create_fte =
- mlx5_fpga_ipsec_fs_create_fte_egress;
- fpga_ipsec_egress.update_fte =
- mlx5_fpga_ipsec_fs_update_fte_egress;
- fpga_ipsec_egress.delete_fte =
- mlx5_fpga_ipsec_fs_delete_fte_egress;
- fpga_ipsec_egress.update_root_ft =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
-}
-
-static int
-mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- if (attrs->tfc_pad) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
- mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.iv_algo !=
- MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
- mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.icv_len != 128) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.key_len != 128 &&
- attrs->keymat.aes_gcm.key_len != 256) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
- return -EOPNOTSUPP;
- }
-
- if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
- (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
- v2_command))) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
-
- if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
- mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
- return ERR_PTR(-EINVAL);
- }
-
- if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
- mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
- return ERR_PTR(-EOPNOTSUPP);
- }
-
- fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
- if (!fpga_xfrm)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&fpga_xfrm->lock);
- memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
- sizeof(fpga_xfrm->accel_xfrm.attrs));
-
- return &fpga_xfrm->accel_xfrm;
-}
-
-static void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- container_of(xfrm, struct mlx5_fpga_esp_xfrm,
- accel_xfrm);
- /* assuming no sa_ctx are connected to this xfrm_ctx */
- kfree(fpga_xfrm);
-}
-
-static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- struct mlx5_core_dev *mdev = xfrm->mdev;
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
- struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
-
- int err = 0;
-
- if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
- return 0;
-
- if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
- mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
- return -EOPNOTSUPP;
- }
-
- if (is_v2_sadb_supported(fipsec)) {
- mlx5_core_warn(mdev, "Modify esp is not supported\n");
- return -EOPNOTSUPP;
- }
-
- fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
-
- mutex_lock(&fpga_xfrm->lock);
-
- if (!fpga_xfrm->sa_ctx)
- /* Unbounded xfrm, change only sw attrs */
- goto change_sw_xfrm_attrs;
-
- /* copy original hw sa */
- memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
- mutex_lock(&fipsec->sa_hash_lock);
- /* remove original hw sa from hash */
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash, rhash_sa));
- /* update hw_sa with new xfrm attrs*/
- mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
- &fpga_xfrm->sa_ctx->hw_sa);
- /* try to insert new hw_sa to hash */
- err = rhashtable_insert_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash, rhash_sa);
- if (err)
- goto rollback_sa;
-
- /* modify device with new hw_sa */
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
- MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
- fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err)
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash,
- rhash_sa));
-rollback_sa:
- if (err) {
- /* return original hw_sa to hash */
- memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
- sizeof(org_hw_sa));
- WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash,
- rhash_sa));
- }
- mutex_unlock(&fipsec->sa_hash_lock);
-
-change_sw_xfrm_attrs:
- if (!err)
- memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
- mutex_unlock(&fpga_xfrm->lock);
- return err;
-}
-
-static const struct mlx5_accel_ipsec_ops fpga_ipsec_ops = {
- .device_caps = mlx5_fpga_ipsec_device_caps,
- .counters_count = mlx5_fpga_ipsec_counters_count,
- .counters_read = mlx5_fpga_ipsec_counters_read,
- .create_hw_context = mlx5_fpga_ipsec_create_sa_ctx,
- .free_hw_context = mlx5_fpga_ipsec_delete_sa_ctx,
- .init = mlx5_fpga_ipsec_init,
- .cleanup = mlx5_fpga_ipsec_cleanup,
- .esp_create_xfrm = mlx5_fpga_esp_create_xfrm,
- .esp_modify_xfrm = mlx5_fpga_esp_modify_xfrm,
- .esp_destroy_xfrm = mlx5_fpga_esp_destroy_xfrm,
-};
-
-const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
-{
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return NULL;
-
- return &fpga_ipsec_ops;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
deleted file mode 100644
index 8931b5584477..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_FPGA_IPSEC_H__
-#define __MLX5_FPGA_IPSEC_H__
-
-#include "accel/ipsec.h"
-#include "fs_cmd.h"
-
-#ifdef CONFIG_MLX5_FPGA_IPSEC
-const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev);
-u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
-const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
-void mlx5_fpga_ipsec_build_fs_cmds(void);
-bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev);
-#else
-static inline
-const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
-{ return NULL; }
-static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
-static inline const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- return mlx5_fs_cmd_get_default(type);
-}
-
-static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {};
-static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; }
-
-#endif /* CONFIG_MLX5_FPGA_IPSEC */
-#endif /* __MLX5_FPGA_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
deleted file mode 100644
index 29b7339ebfa3..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-#include "fpga/tls.h"
-#include "fpga/cmd.h"
-#include "fpga/sdk.h"
-#include "fpga/core.h"
-#include "accel/tls.h"
-
-struct mlx5_fpga_tls_command_context;
-
-typedef void (*mlx5_fpga_tls_command_complete)
- (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *ctx,
- struct mlx5_fpga_dma_buf *resp);
-
-struct mlx5_fpga_tls_command_context {
- struct list_head list;
- /* There is no guarantee on the order between the TX completion
- * and the command response.
- * The TX completion is going to touch cmd->buf even in
- * the case of successful transmission.
- * So instead of requiring separate allocations for cmd
- * and cmd->buf we've decided to use a reference counter
- */
- refcount_t ref;
- struct mlx5_fpga_dma_buf buf;
- mlx5_fpga_tls_command_complete complete;
-};
-
-static void
-mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
-{
- if (refcount_dec_and_test(&ctx->ref))
- kfree(ctx);
-}
-
-static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *resp)
-{
- struct mlx5_fpga_conn *conn = fdev->tls->conn;
- struct mlx5_fpga_tls_command_context *ctx;
- struct mlx5_fpga_tls *tls = fdev->tls;
- unsigned long flags;
-
- spin_lock_irqsave(&tls->pending_cmds_lock, flags);
- ctx = list_first_entry(&tls->pending_cmds,
- struct mlx5_fpga_tls_command_context, list);
- list_del(&ctx->list);
- spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
- ctx->complete(conn, fdev, ctx, resp);
-}
-
-static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf,
- u8 status)
-{
- struct mlx5_fpga_tls_command_context *ctx =
- container_of(buf, struct mlx5_fpga_tls_command_context, buf);
-
- mlx5_fpga_tls_put_command_ctx(ctx);
-
- if (unlikely(status))
- mlx5_fpga_tls_cmd_complete(fdev, NULL);
-}
-
-static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- mlx5_fpga_tls_command_complete complete)
-{
- struct mlx5_fpga_tls *tls = fdev->tls;
- unsigned long flags;
- int ret;
-
- refcount_set(&cmd->ref, 2);
- cmd->complete = complete;
- cmd->buf.complete = mlx5_fpga_cmd_send_complete;
-
- spin_lock_irqsave(&tls->pending_cmds_lock, flags);
- /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
- * to make sure commands are inserted to the tls->pending_cmds list
- * and the command QP in the same order.
- */
- ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
- if (likely(!ret))
- list_add_tail(&cmd->list, &tls->pending_cmds);
- else
- complete(tls->conn, fdev, cmd, NULL);
- spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
-}
-
-/* Start of context identifiers range (inclusive) */
-#define SWID_START 0
-/* End of context identifiers range (exclusive) */
-#define SWID_END BIT(24)
-
-static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
- void *ptr)
-{
- unsigned long flags;
- int ret;
-
- /* TLS metadata format is 1 byte for syndrome followed
- * by 3 bytes of swid (software ID)
- * swid must not exceed 3 bytes.
- * See tls_rxtx.c:insert_pet() for details
- */
- BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
-
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(idr_spinlock, flags);
- ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
- spin_unlock_irqrestore(idr_spinlock, flags);
- idr_preload_end();
-
- return ret;
-}
-
-static void *mlx5_fpga_tls_release_swid(struct idr *idr,
- spinlock_t *idr_spinlock, u32 swid)
-{
- unsigned long flags;
- void *ptr;
-
- spin_lock_irqsave(idr_spinlock, flags);
- ptr = idr_remove(idr, swid);
- spin_unlock_irqrestore(idr_spinlock, flags);
- return ptr;
-}
-
-static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf, u8 status)
-{
- kfree(buf);
-}
-
-static void
-mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- struct mlx5_fpga_dma_buf *resp)
-{
- if (resp) {
- u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
-
- if (syndrome)
- mlx5_fpga_err(fdev,
- "Teardown stream failed with syndrome = %d",
- syndrome);
- }
- mlx5_fpga_tls_put_command_ctx(cmd);
-}
-
-static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
-{
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
- MLX5_BYTE_OFF(tls_flow, ipv6));
-
- MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
- MLX5_SET(tls_cmd, cmd, direction_sx,
- MLX5_GET(tls_flow, flow, direction_sx));
-}
-
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn)
-{
- struct mlx5_fpga_dma_buf *buf;
- int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
- void *flow;
- void *cmd;
- int ret;
-
- buf = kzalloc(size, GFP_ATOMIC);
- if (!buf)
- return -ENOMEM;
-
- cmd = (buf + 1);
-
- rcu_read_lock();
- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- if (unlikely(!flow)) {
- rcu_read_unlock();
- WARN_ONCE(1, "Received NULL pointer for handle\n");
- kfree(buf);
- return -EINVAL;
- }
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
- rcu_read_unlock();
-
- MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
- MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
- MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
- MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
-
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- buf->complete = mlx_tls_kfree_complete;
-
- ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
- if (ret < 0)
- kfree(buf);
-
- return ret;
-}
-
-static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
- void *flow, u32 swid, gfp_t flags)
-{
- struct mlx5_fpga_tls_command_context *ctx;
- struct mlx5_fpga_dma_buf *buf;
- void *cmd;
-
- ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
- if (!ctx)
- return;
-
- buf = &ctx->buf;
- cmd = (ctx + 1);
- MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
- MLX5_SET(tls_cmd, cmd, swid, swid);
-
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
- kfree(flow);
-
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
-
- mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
- mlx5_fpga_tls_teardown_completion);
-}
-
-void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- gfp_t flags, bool direction_sx)
-{
- struct mlx5_fpga_tls *tls = mdev->fpga->tls;
- void *flow;
-
- if (direction_sx)
- flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock,
- swid);
- else
- flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock,
- swid);
-
- if (!flow) {
- mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
- swid);
- return;
- }
-
- synchronize_rcu(); /* before kfree(flow) */
- mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
-}
-
-enum mlx5_fpga_setup_stream_status {
- MLX5_FPGA_CMD_PENDING,
- MLX5_FPGA_CMD_SEND_FAILED,
- MLX5_FPGA_CMD_RESPONSE_RECEIVED,
- MLX5_FPGA_CMD_ABANDONED,
-};
-
-struct mlx5_setup_stream_context {
- struct mlx5_fpga_tls_command_context cmd;
- atomic_t status;
- u32 syndrome;
- struct completion comp;
-};
-
-static void
-mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- struct mlx5_fpga_dma_buf *resp)
-{
- struct mlx5_setup_stream_context *ctx =
- container_of(cmd, struct mlx5_setup_stream_context, cmd);
- int status = MLX5_FPGA_CMD_SEND_FAILED;
- void *tls_cmd = ctx + 1;
-
- /* If we failed to send to command resp == NULL */
- if (resp) {
- ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
- status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
- }
-
- status = atomic_xchg_release(&ctx->status, status);
- if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
- complete(&ctx->comp);
- return;
- }
-
- mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
- ctx->syndrome);
-
- if (!ctx->syndrome) {
- /* The process was killed while waiting for the context to be
- * added, and the add completed successfully.
- * We need to destroy the HW context, and we can't can't reuse
- * the command context because we might not have received
- * the tx completion yet.
- */
- mlx5_fpga_tls_del_flow(fdev->mdev,
- MLX5_GET(tls_cmd, tls_cmd, swid),
- GFP_ATOMIC,
- MLX5_GET(tls_cmd, tls_cmd,
- direction_sx));
- }
-
- mlx5_fpga_tls_put_command_ctx(cmd);
-}
-
-static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
- struct mlx5_setup_stream_context *ctx)
-{
- struct mlx5_fpga_dma_buf *buf;
- void *cmd = ctx + 1;
- int status, ret = 0;
-
- buf = &ctx->cmd.buf;
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
-
- init_completion(&ctx->comp);
- atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
- ctx->syndrome = -1;
-
- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
- mlx5_fpga_tls_setup_completion);
- wait_for_completion_killable(&ctx->comp);
-
- status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
- if (unlikely(status == MLX5_FPGA_CMD_PENDING))
- /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
- return -EINTR;
-
- if (unlikely(ctx->syndrome))
- ret = -ENOMEM;
-
- mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
- return ret;
-}
-
-static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
- struct mlx5_fpga_dma_buf *buf)
-{
- struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
-
- mlx5_fpga_tls_cmd_complete(fdev, buf);
-}
-
-bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
-{
- if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
- return false;
-
- if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
- return false;
-
- return true;
-}
-
-static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
- u32 *p_caps)
-{
- int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
- u32 caps = 0;
- void *buf;
-
- buf = kzalloc(cap_size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
- if (err)
- goto out;
-
- if (MLX5_GET(tls_extended_cap, buf, tx))
- caps |= MLX5_ACCEL_TLS_TX;
- if (MLX5_GET(tls_extended_cap, buf, rx))
- caps |= MLX5_ACCEL_TLS_RX;
- if (MLX5_GET(tls_extended_cap, buf, tls_v12))
- caps |= MLX5_ACCEL_TLS_V12;
- if (MLX5_GET(tls_extended_cap, buf, tls_v13))
- caps |= MLX5_ACCEL_TLS_V13;
- if (MLX5_GET(tls_extended_cap, buf, lro))
- caps |= MLX5_ACCEL_TLS_LRO;
- if (MLX5_GET(tls_extended_cap, buf, ipv6))
- caps |= MLX5_ACCEL_TLS_IPV6;
-
- if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
- caps |= MLX5_ACCEL_TLS_AES_GCM128;
- if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
- caps |= MLX5_ACCEL_TLS_AES_GCM256;
-
- *p_caps = caps;
- err = 0;
-out:
- kfree(buf);
- return err;
-}
-
-int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_conn_attr init_attr = {0};
- struct mlx5_fpga_conn *conn;
- struct mlx5_fpga_tls *tls;
- int err = 0;
-
- if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
- return 0;
-
- tls = kzalloc(sizeof(*tls), GFP_KERNEL);
- if (!tls)
- return -ENOMEM;
-
- err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
- if (err)
- goto error;
-
- if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
- err = -ENOTSUPP;
- goto error;
- }
-
- init_attr.rx_size = SBU_QP_QUEUE_SIZE;
- init_attr.tx_size = SBU_QP_QUEUE_SIZE;
- init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
- init_attr.cb_arg = fdev;
- conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
- if (IS_ERR(conn)) {
- err = PTR_ERR(conn);
- mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
- err);
- goto error;
- }
-
- tls->conn = conn;
- spin_lock_init(&tls->pending_cmds_lock);
- INIT_LIST_HEAD(&tls->pending_cmds);
-
- idr_init(&tls->tx_idr);
- idr_init(&tls->rx_idr);
- spin_lock_init(&tls->tx_idr_spinlock);
- spin_lock_init(&tls->rx_idr_spinlock);
- fdev->tls = tls;
- return 0;
-
-error:
- kfree(tls);
- return err;
-}
-
-void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!fdev || !fdev->tls)
- return;
-
- mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
- kfree(fdev->tls);
- fdev->tls = NULL;
-}
-
-static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
- struct tls_crypto_info *info,
- __be64 *rcd_sn)
-{
- struct tls12_crypto_info_aes_gcm_128 *crypto_info =
- (struct tls12_crypto_info_aes_gcm_128 *)info;
-
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
- TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
-
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
- crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
- crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
- /* in AES-GCM 128 we need to write the key twice */
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
- TLS_CIPHER_AES_GCM_128_KEY_SIZE,
- crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
- MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
-}
-
-static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
- struct tls_crypto_info *crypto_info)
-{
- __be64 rcd_sn;
-
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
- return -EINVAL;
- mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 swid, u32 tcp_sn)
-{
- u32 caps = mlx5_fpga_tls_device_caps(mdev);
- struct mlx5_setup_stream_context *ctx;
- int ret = -ENOMEM;
- size_t cmd_size;
- void *cmd;
-
- cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
- ctx = kzalloc(cmd_size, GFP_KERNEL);
- if (!ctx)
- goto out;
-
- cmd = ctx + 1;
- ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
- if (ret)
- goto free_ctx;
-
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
-
- MLX5_SET(tls_cmd, cmd, swid, swid);
- MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
-
- return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
-
-free_ctx:
- kfree(ctx);
-out:
- return ret;
-}
-
-int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx)
-{
- struct mlx5_fpga_tls *tls = mdev->fpga->tls;
- int ret = -ENOMEM;
- u32 swid;
-
- if (direction_sx)
- ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock, flow);
- else
- ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock, flow);
-
- if (ret < 0)
- return ret;
-
- swid = ret;
- MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
-
- ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
- start_offload_tcp_sn);
- if (ret && ret != -EINTR)
- goto free_swid;
-
- *p_swid = swid;
- return 0;
-free_swid:
- if (direction_sx)
- mlx5_fpga_tls_release_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock, swid);
- else
- mlx5_fpga_tls_release_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock, swid);
-
- return ret;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
deleted file mode 100644
index 5714cf391d1b..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_FPGA_TLS_H__
-#define __MLX5_FPGA_TLS_H__
-
-#include <linux/mlx5/driver.h>
-
-#include <net/tls.h>
-#include "fpga/core.h"
-
-struct mlx5_fpga_tls {
- struct list_head pending_cmds;
- spinlock_t pending_cmds_lock; /* Protects pending_cmds */
- u32 caps;
- struct mlx5_fpga_conn *conn;
-
- struct idr tx_idr;
- struct idr rx_idr;
- spinlock_t tx_idr_spinlock; /* protects the IDR */
- spinlock_t rx_idr_spinlock; /* protects the IDR */
-};
-
-int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx);
-
-void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- gfp_t flags, bool direction_sx);
-
-bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
-int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
-
-static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
-{
- return mdev->fpga->tls->caps;
-}
-
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn);
-
-#endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 750b21124a1a..32d4c967469c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -50,10 +50,12 @@ static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
- ft->max_fte = size ? roundup_pow_of_two(size) : 1;
+ int max_fte = ft_attr->max_fte;
+
+ ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1;
return 0;
}
@@ -152,6 +154,12 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
return 0;
}
+static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave,
bool ft_id_valid,
@@ -252,7 +260,7 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
@@ -261,17 +269,19 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ unsigned int size;
int err;
- if (size != POOL_NEXT_SIZE)
- size = roundup_pow_of_two(size);
- size = mlx5_ft_pool_get_avail_sz(dev, ft->type, size);
+ if (ft_attr->max_fte != POOL_NEXT_SIZE)
+ size = roundup_pow_of_two(ft_attr->max_fte);
+ size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte);
if (!size)
return -ENOSPC;
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
+ MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid);
MLX5_SET(create_flow_table_in, in, table_type, ft->type);
MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level);
MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0);
@@ -449,9 +459,11 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
return 0;
list_for_each_entry(dst, &fte->node.children, node.list) {
- if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_NONE)
continue;
- if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
num_encap++;
num_fwd_destinations++;
@@ -471,6 +483,30 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
return 0;
}
+
+static void
+mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context)
+{
+ void *exe_aso_ctrl;
+ void *execute_aso;
+
+ execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context,
+ execute_aso[0]);
+ MLX5_SET(execute_aso, execute_aso, valid, 1);
+ MLX5_SET(execute_aso, execute_aso, aso_object_id,
+ fte->action.exe_aso.object_id);
+
+ exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id,
+ fte->action.exe_aso.return_reg_id);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type,
+ fte->action.exe_aso.type);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color,
+ fte->action.exe_aso.flow_meter.init_color);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id,
+ fte->action.exe_aso.flow_meter.meter_idx);
+}
+
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
@@ -541,7 +577,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_hdr->id);
- MLX5_SET(flow_context, in_flow_context, ipsec_obj_id, fte->action.ipsec_obj_id);
+ MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type,
+ fte->action.crypto.type);
+ MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id,
+ fte->action.crypto.obj_id);
vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
@@ -564,18 +603,23 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int list_size = 0;
list_for_each_entry(dst, &fte->node.children, node.list) {
- unsigned int id, type = dst->dest_attr.type;
+ enum mlx5_flow_destination_type type = dst->dest_attr.type;
+ enum mlx5_ifc_flow_destination_type ifc_type;
+ unsigned int id;
if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ continue;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
id = dst->dest_attr.ft_num;
- type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
id = dst->dest_attr.ft->id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
break;
case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
@@ -589,8 +633,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) {
/* destination_id is reserved */
id = 0;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
break;
}
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
id = dst->dest_attr.vport.num;
if (extended_dest &&
dst->dest_attr.vport.pkt_reformat) {
@@ -605,13 +651,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
id = dst->dest_attr.sampler_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
break;
default:
id = dst->dest_attr.tir_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
}
MLX5_SET(dest_format_struct, in_dests, destination_type,
- type);
+ ifc_type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
in_dests += dst_cnt_size;
list_size++;
@@ -646,6 +694,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
list_size);
}
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
+ mlx5_cmd_set_fte_flow_meter(fte, in_flow_context);
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+ }
+
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
err_out:
kvfree(in);
@@ -788,7 +845,8 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
int err;
u32 *in;
- if (namespace == MLX5_FLOW_NAMESPACE_FDB)
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB ||
+ namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS)
max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
else
max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
@@ -860,18 +918,19 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
switch (namespace) {
case MLX5_FLOW_NAMESPACE_FDB:
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions);
table_type = FS_FT_FDB;
break;
+ case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
case MLX5_FLOW_NAMESPACE_KERNEL:
case MLX5_FLOW_NAMESPACE_BYPASS:
max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_RX;
break;
case MLX5_FLOW_NAMESPACE_EGRESS:
-#ifdef CONFIG_MLX5_IPSEC
- case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL:
-#endif
+ case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
+ case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
@@ -968,6 +1027,12 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
}
+static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.create_flow_table = mlx5_cmd_create_flow_table,
.destroy_flow_table = mlx5_cmd_destroy_flow_table,
@@ -987,6 +1052,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_get_capabilities,
};
static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -1008,6 +1074,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_stub_get_capabilities,
};
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 220ec632d35a..8ef4254b9ea1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -38,7 +38,7 @@
struct mlx5_flow_cmds {
int (*create_flow_table)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft);
int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft);
@@ -101,6 +101,9 @@ struct mlx5_flow_cmds {
u16 format_id, u32 *match_mask);
int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
int definer_id);
+
+ u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type);
};
int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 386ab9a2d490..d53749248fa0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,8 +40,6 @@
#include "fs_cmd.h"
#include "fs_ft_pool.h"
#include "diag/fs_tracepoint.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
@@ -106,6 +104,10 @@
#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
LEFTOVERS_NUM_PRIOS)
+#define KERNEL_RX_MACSEC_NUM_PRIOS 1
+#define KERNEL_RX_MACSEC_NUM_LEVELS 2
+#define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS)
+
#define ETHTOOL_PRIO_NUM_LEVELS 1
#define ETHTOOL_NUM_PRIOS 11
#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
@@ -116,7 +118,7 @@
#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
#define KERNEL_NIC_TC_NUM_PRIOS 1
-#define KERNEL_NIC_TC_NUM_LEVELS 2
+#define KERNEL_NIC_TC_NUM_LEVELS 3
#define ANCHOR_NUM_LEVELS 1
#define ANCHOR_NUM_PRIOS 1
@@ -128,11 +130,15 @@
#define LAG_PRIO_NUM_LEVELS 1
#define LAG_NUM_PRIOS 1
-#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1)
#define KERNEL_TX_IPSEC_NUM_PRIOS 1
#define KERNEL_TX_IPSEC_NUM_LEVELS 1
-#define KERNEL_TX_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
+#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
+
+#define KERNEL_TX_MACSEC_NUM_PRIOS 1
+#define KERNEL_TX_MACSEC_NUM_LEVELS 2
+#define KERNEL_TX_MACSEC_MIN_LEVEL (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS)
struct node_caps {
size_t arr_sz;
@@ -151,12 +157,16 @@ static struct init_tree_node {
enum mlx5_flow_table_miss_action def_miss_action;
} root_fs = {
.type = FS_TYPE_NAMESPACE,
- .ar_size = 7,
+ .ar_size = 8,
.children = (struct init_tree_node[]){
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS,
+ KERNEL_RX_MACSEC_NUM_LEVELS))),
ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
@@ -188,24 +198,23 @@ static struct init_tree_node {
static struct init_tree_node egress_root_fs = {
.type = FS_TYPE_NAMESPACE,
-#ifdef CONFIG_MLX5_IPSEC
- .ar_size = 2,
-#else
- .ar_size = 1,
-#endif
+ .ar_size = 3,
.children = (struct init_tree_node[]) {
ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
-#ifdef CONFIG_MLX5_IPSEC
- ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0,
+ ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
KERNEL_TX_IPSEC_NUM_LEVELS))),
-#endif
+ ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS,
+ KERNEL_TX_MACSEC_NUM_LEVELS))),
}
};
@@ -432,6 +441,16 @@ static bool is_fwd_next_action(u32 action)
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
}
+static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
+{
+ return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM ||
+ type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE ||
+ type == MLX5_FLOW_DESTINATION_TYPE_UPLINK ||
+ type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
+ type == MLX5_FLOW_DESTINATION_TYPE_TIR;
+}
+
static bool check_valid_spec(const struct mlx5_flow_spec *spec)
{
int i;
@@ -558,8 +577,8 @@ static void del_sw_hw_rule(struct fs_node *node)
mutex_unlock(&rule->dest_attr.ft->lock);
}
- if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
- --fte->dests_size) {
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) {
+ --fte->dests_size;
fte->modify_mask |=
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
@@ -567,17 +586,23 @@ static void del_sw_hw_rule(struct fs_node *node)
goto out;
}
- if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT &&
- --fte->dests_size) {
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ --fte->dests_size;
fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
goto out;
}
- if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
- --fte->dests_size) {
+ if (is_fwd_dest_type(rule->dest_attr.type)) {
+ --fte->dests_size;
+ --fte->fwd_dests;
+
+ if (!fte->fwd_dests)
+ fte->action.action &=
+ ~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
fte->modify_mask |=
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ goto out;
}
out:
kfree(rule);
@@ -597,6 +622,7 @@ static void del_hw_fte(struct fs_node *node)
fs_get_obj(ft, fg->node.parent);
trace_mlx5_fs_del_fte(fte);
+ WARN_ON(fte->dests_size);
dev = get_dev(&ft->node);
root = find_root(&ft->node);
if (node->active) {
@@ -1146,7 +1172,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
find_next_chained_ft(fs_prio);
ft->def_miss_action = ns->def_miss_action;
ft->ns = ns;
- err = root->cmds->create_flow_table(root, ft, ft_attr->max_fte, next_ft);
+ err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
if (err)
goto free_ft;
@@ -1186,6 +1212,12 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
}
EXPORT_SYMBOL(mlx5_create_flow_table);
+u32 mlx5_flow_table_id(struct mlx5_flow_table *ft)
+{
+ return ft->id;
+}
+EXPORT_SYMBOL(mlx5_flow_table_id);
+
struct mlx5_flow_table *
mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr, u16 vport)
@@ -1296,6 +1328,8 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
rule->node.type = FS_TYPE_FLOW_DEST;
if (dest)
memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ else
+ rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE;
return rule;
}
@@ -1372,6 +1406,9 @@ create_flow_handle(struct fs_fte *fte,
if (dest) {
fte->dests_size++;
+ if (is_fwd_dest_type(dest[i].type))
+ fte->fwd_dests++;
+
type = dest[i].type ==
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
*modify_mask |= type ? count : dst;
@@ -1525,7 +1562,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
if (d1->type == d2->type) {
- if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
d1->vport.num == d2->vport.num &&
d1->vport.flags == d2->vport.flags &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
@@ -1559,9 +1597,22 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
return NULL;
}
-static bool check_conflicting_actions(u32 action1, u32 action2)
+static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0,
+ const struct mlx5_fs_vlan *vlan1)
{
- u32 xored_actions = action1 ^ action2;
+ return vlan0->ethtype != vlan1->ethtype ||
+ vlan0->vid != vlan1->vid ||
+ vlan0->prio != vlan1->prio;
+}
+
+static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
+ const struct mlx5_flow_act *act2)
+{
+ u32 action1 = act1->action;
+ u32 action2 = act2->action;
+ u32 xored_actions;
+
+ xored_actions = action1 ^ action2;
/* if one rule only wants to count, it's ok */
if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
@@ -1578,6 +1629,22 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
return true;
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
+ act1->pkt_reformat != act2->pkt_reformat)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ act1->modify_hdr != act2->modify_hdr)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0]))
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 &&
+ check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1]))
+ return true;
+
return false;
}
@@ -1585,7 +1652,7 @@ static int check_conflicting_ftes(struct fs_fte *fte,
const struct mlx5_flow_context *flow_context,
const struct mlx5_flow_act *flow_act)
{
- if (check_conflicting_actions(flow_act->action, fte->action.action)) {
+ if (check_conflicting_actions(flow_act, &fte->action)) {
mlx5_core_warn(get_dev(&fte->node),
"Found two FTEs with conflicting actions\n");
return -EEXIST;
@@ -1695,6 +1762,7 @@ static void free_match_list(struct match_list *head, bool ft_locked)
static int build_match_list(struct match_list *match_head,
struct mlx5_flow_table *ft,
const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_group *fg,
bool ft_locked)
{
struct rhlist_head *tmp, *list;
@@ -1709,6 +1777,9 @@ static int build_match_list(struct match_list *match_head,
rhl_for_each_entry_rcu(g, tmp, list, hash) {
struct match_list *curr_match;
+ if (fg && fg != g)
+ continue;
+
if (unlikely(!tree_get_node(&g->node)))
continue;
@@ -1888,6 +1959,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
if (!check_valid_spec(spec))
return ERR_PTR(-EINVAL);
+ if (flow_act->fg && ft->autogroup.active)
+ return ERR_PTR(-EINVAL);
+
for (i = 0; i < dest_num; i++) {
if (!dest_is_valid(&dest[i], flow_act, ft))
return ERR_PTR(-EINVAL);
@@ -1897,7 +1971,7 @@ search_again_locked:
version = atomic_read(&ft->node.version);
/* Collect all fgs which has a matching match_criteria */
- err = build_match_list(&match_head, ft, spec, take_write);
+ err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write);
if (err) {
if (take_write)
up_write_ref_node(&ft->node, false);
@@ -2063,16 +2137,18 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
down_write_ref_node(&fte->node, false);
for (i = handle->num_rules - 1; i >= 0; i--)
tree_remove_node(&handle->rule[i]->node, true);
- if (fte->dests_size) {
- if (fte->modify_mask)
- modify_fte(fte);
- up_write_ref_node(&fte->node, false);
- } else if (list_empty(&fte->node.children)) {
- del_hw_fte(&fte->node);
+ if (list_empty(&fte->node.children)) {
+ fte->node.del_hw_func(&fte->node);
/* Avoid double call to del_hw_fte */
fte->node.del_hw_func = NULL;
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
+ } else if (fte->dests_size) {
+ if (fte->modify_mask)
+ modify_fte(fte);
+ up_write_ref_node(&fte->node, false);
+ } else {
+ up_write_ref_node(&fte->node, false);
}
kfree(handle);
}
@@ -2206,6 +2282,23 @@ struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type)
+{
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
+ case MLX5_FLOW_NAMESPACE_LAG:
+ case MLX5_FLOW_NAMESPACE_OFFLOADS:
+ case MLX5_FLOW_NAMESPACE_ETHTOOL:
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ case MLX5_FLOW_NAMESPACE_ANCHOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type)
{
@@ -2235,31 +2328,40 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
if (steering->sniffer_tx_root_ns)
return &steering->sniffer_tx_root_ns->ns;
return NULL;
- default:
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ root_ns = steering->fdb_root_ns;
+ prio = FDB_BYPASS_PATH;
break;
- }
-
- if (type == MLX5_FLOW_NAMESPACE_EGRESS ||
- type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) {
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
+ case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
root_ns = steering->egress_root_ns;
prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_BYPASS_PRIO;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL:
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_KERNEL_PRIO;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
root_ns = steering->rdma_tx_root_ns;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS) {
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS:
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_COUNTERS_PRIO;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS) {
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS:
root_ns = steering->rdma_tx_root_ns;
prio = RDMA_TX_COUNTERS_PRIO;
- } else { /* Must be NIC RX */
+ break;
+ default: /* Must be NIC RX */
+ WARN_ON(!is_nic_rx_ns(type));
root_ns = steering->root_ns;
prio = type;
+ break;
}
if (!root_ns)
@@ -2485,10 +2587,6 @@ static struct mlx5_flow_root_namespace
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_namespace *ns;
- if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
- (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
- cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
-
/* Create the root namespace */
root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
if (!root_ns)
@@ -2629,28 +2727,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
clean_tree(&root_ns->ns.node);
}
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
-{
- struct mlx5_flow_steering *steering = dev->priv.steering;
-
- cleanup_root_ns(steering->root_ns);
- cleanup_root_ns(steering->fdb_root_ns);
- steering->fdb_root_ns = NULL;
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- cleanup_root_ns(steering->port_sel_root_ns);
- cleanup_root_ns(steering->sniffer_rx_root_ns);
- cleanup_root_ns(steering->sniffer_tx_root_ns);
- cleanup_root_ns(steering->rdma_rx_root_ns);
- cleanup_root_ns(steering->rdma_tx_root_ns);
- cleanup_root_ns(steering->egress_root_ns);
- mlx5_cleanup_fc_stats(dev);
- kmem_cache_destroy(steering->ftes_cache);
- kmem_cache_destroy(steering->fgs_cache);
- mlx5_ft_pool_destroy(dev);
- kfree(steering);
-}
-
static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;
@@ -2822,6 +2898,36 @@ static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
return 0;
}
+static int create_fdb_bypass(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_flow_namespace *ns;
+ struct fs_prio *prio;
+ int i;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+
+ for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) {
+ prio = fs_create_prio(ns, i, 1);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
+}
+
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *maj_prio;
@@ -2831,12 +2937,10 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
if (!steering->fdb_root_ns)
return -ENOMEM;
- maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
- 1);
- if (IS_ERR(maj_prio)) {
- err = PTR_ERR(maj_prio);
+ err = create_fdb_bypass(steering);
+ if (err)
goto out_err;
- }
+
err = create_fdb_fast_path(steering);
if (err)
goto out_err;
@@ -2874,10 +2978,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
return 0;
out_err:
- cleanup_root_ns(steering->fdb_root_ns);
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- steering->fdb_root_ns = NULL;
+ cleanup_fdb_root_ns(steering);
return err;
}
@@ -2995,6 +3096,22 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
steering->esw_ingress_root_ns = NULL;
}
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ ns = mlx5_get_flow_namespace(dev, type);
+ if (!ns)
+ return 0;
+
+ root = find_root(&ns->node);
+ if (!root)
+ return 0;
+
+ return root->cmds->get_capabilities(root, root->table_type);
+}
+
static int init_egress_root_ns(struct mlx5_flow_steering *steering)
{
int err;
@@ -3016,37 +3133,24 @@ cleanup:
return err;
}
-int mlx5_init_fs(struct mlx5_core_dev *dev)
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
{
- struct mlx5_flow_steering *steering;
- int err = 0;
-
- err = mlx5_init_fc_stats(dev);
- if (err)
- return err;
-
- err = mlx5_ft_pool_init(dev);
- if (err)
- return err;
-
- steering = kzalloc(sizeof(*steering), GFP_KERNEL);
- if (!steering) {
- err = -ENOMEM;
- goto err;
- }
+ struct mlx5_flow_steering *steering = dev->priv.steering;
- steering->dev = dev;
- dev->priv.steering = steering;
+ cleanup_root_ns(steering->root_ns);
+ cleanup_fdb_root_ns(steering);
+ cleanup_root_ns(steering->port_sel_root_ns);
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ cleanup_root_ns(steering->rdma_rx_root_ns);
+ cleanup_root_ns(steering->rdma_tx_root_ns);
+ cleanup_root_ns(steering->egress_root_ns);
+}
- steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
- sizeof(struct mlx5_flow_group), 0,
- 0, NULL);
- steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
- 0, NULL);
- if (!steering->ftes_cache || !steering->fgs_cache) {
- err = -ENOMEM;
- goto err;
- }
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err = 0;
if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
@@ -3097,16 +3201,71 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}
- if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
err = init_egress_root_ns(steering);
if (err)
goto err;
}
return 0;
+
+err:
+ mlx5_fs_core_cleanup(dev);
+ return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
+ kfree(steering);
+ mlx5_ft_pool_destroy(dev);
+ mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering;
+ int err = 0;
+
+ err = mlx5_init_fc_stats(dev);
+ if (err)
+ return err;
+
+ err = mlx5_ft_pool_init(dev);
+ if (err)
+ goto err;
+
+ steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+ if (!steering) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ steering->dev = dev;
+ dev->priv.steering = steering;
+
+ if (mlx5_fs_dr_is_supported(dev))
+ steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else
+ steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+
err:
- mlx5_cleanup_fs(dev);
+ mlx5_fs_core_free(dev);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 7711db245c63..3af50fd04d28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -120,6 +120,11 @@ enum mlx5_flow_steering_mode {
MLX5_FLOW_STEERING_MODE_SMFS
};
+enum mlx5_flow_steering_capabilty {
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
+ MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
+};
+
struct mlx5_flow_steering {
struct mlx5_core_dev *dev;
enum mlx5_flow_steering_mode mode;
@@ -203,7 +208,7 @@ struct mlx5_ft_underlay_qp {
u32 qpn;
};
-#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_c00
+#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00
/* Calculate the fte_match_param length and without the reserved length.
* Make sure the reserved field is the last.
*/
@@ -221,6 +226,7 @@ struct fs_fte {
struct mlx5_fs_dr_rule fs_dr_rule;
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
u32 dests_size;
+ u32 fwd_dests;
u32 index;
struct mlx5_flow_context flow_context;
struct mlx5_flow_act action;
@@ -293,14 +299,18 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
enum mlx5_flow_steering_mode mode);
-int mlx5_init_fs(struct mlx5_core_dev *dev);
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type);
+
struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 7e0e04cf26f8..b406e0367af6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -38,9 +38,10 @@
#include "fs_cmd.h"
#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
-#define MLX5_SF_NUM_COUNTERS_BULK 8
+#define MLX5_INIT_COUNTERS_BULK 8
#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
#define MLX5_FC_POOL_USED_BUFF_RATIO 10
@@ -145,13 +146,15 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
spin_unlock(&fc_stats->counters_idr_lock);
}
-static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
{
- int num_counters_bulk = mlx5_core_is_sf(dev) ?
- MLX5_SF_NUM_COUNTERS_BULK :
- MLX5_SW_MAX_COUNTERS_BULK;
+ return min_t(int, MLX5_INIT_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
- return min_t(int, num_counters_bulk,
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+{
+ return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
(1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
}
@@ -177,7 +180,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
bool query_more_counters = (first->id <= last_id);
- int max_bulk_len = get_max_bulk_query_len(dev);
+ int cur_bulk_len = fc_stats->bulk_query_len;
u32 *data = fc_stats->bulk_query_out;
struct mlx5_fc *counter = first;
u32 bulk_base_id;
@@ -189,7 +192,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
bulk_base_id = counter->id & ~0x3;
/* number of counters to query inc. the last counter */
- bulk_len = min_t(int, max_bulk_len,
+ bulk_len = min_t(int, cur_bulk_len,
ALIGN(last_id - bulk_base_id + 1, 4));
err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
@@ -230,6 +233,41 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
mlx5_fc_free(dev, counter);
}
+static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int max_bulk_len = get_max_bulk_query_len(dev);
+ unsigned long now = jiffies;
+ u32 *bulk_query_out_tmp;
+ int max_out_len;
+
+ if (fc_stats->bulk_query_alloc_failed &&
+ time_before(now, fc_stats->next_bulk_query_alloc))
+ return;
+
+ max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+ bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
+ if (!bulk_query_out_tmp) {
+ mlx5_core_warn_once(dev,
+ "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n",
+ max_bulk_len);
+ fc_stats->bulk_query_alloc_failed = true;
+ fc_stats->next_bulk_query_alloc =
+ now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
+ return;
+ }
+
+ kfree(fc_stats->bulk_query_out);
+ fc_stats->bulk_query_out = bulk_query_out_tmp;
+ fc_stats->bulk_query_len = max_bulk_len;
+ if (fc_stats->bulk_query_alloc_failed) {
+ mlx5_core_info(dev,
+ "Flow counters bulk query buffer size increased, bulk_size(%d)\n",
+ max_bulk_len);
+ fc_stats->bulk_query_alloc_failed = false;
+ }
+}
+
static void mlx5_fc_stats_work(struct work_struct *work)
{
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
@@ -247,15 +285,22 @@ static void mlx5_fc_stats_work(struct work_struct *work)
queue_delayed_work(fc_stats->wq, &fc_stats->work,
fc_stats->sampling_interval);
- llist_for_each_entry(counter, addlist, addlist)
+ llist_for_each_entry(counter, addlist, addlist) {
mlx5_fc_stats_insert(dev, counter);
+ fc_stats->num_counters++;
+ }
llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
mlx5_fc_stats_remove(dev, counter);
mlx5_fc_release(dev, counter);
+ fc_stats->num_counters--;
}
+ if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
+ fc_stats->num_counters > get_init_bulk_query_len(dev))
+ mlx5_fc_stats_bulk_query_size_increase(dev);
+
if (time_before(now, fc_stats->next_query) ||
list_empty(&fc_stats->counters))
return;
@@ -378,8 +423,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy);
int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- int max_bulk_len;
- int max_out_len;
+ int init_bulk_len;
+ int init_out_len;
spin_lock_init(&fc_stats->counters_idr_lock);
idr_init(&fc_stats->counters_idr);
@@ -387,11 +432,12 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
init_llist_head(&fc_stats->addlist);
init_llist_head(&fc_stats->dellist);
- max_bulk_len = get_max_bulk_query_len(dev);
- max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
- fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+ init_bulk_len = get_init_bulk_query_len(dev);
+ init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
+ fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
if (!fc_stats->bulk_query_out)
return -ENOMEM;
+ fc_stats->bulk_query_len = init_bulk_len;
fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
if (!fc_stats->wq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 2d8406fab844..f34e758a2f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -32,11 +32,9 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/eswitch.h>
-#include <linux/module.h>
#include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h"
#include "lib/tout.h"
-#include "accel/tls.h"
enum {
MCQS_IDENTIFIER_BOOT_IMG = 0x1,
@@ -250,7 +248,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
- if (mlx5_accel_is_ktls_tx(dev) || mlx5_accel_is_ktls_rx(dev)) {
+ if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
if (err)
return err;
@@ -275,6 +273,19 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN_64(dev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_MACSEC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, adv_virtualization)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ADV_VIRTUALIZATION);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -291,6 +302,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
sw_owner_id[i]);
}
+ if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) &&
+ dev->priv.sw_vhca_id > 0)
+ MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id);
+
return mlx5_cmd_exec_in(dev, init_hca, in);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 0b0234f9d694..9d908a0ccfef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -8,7 +8,8 @@
enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
- MLX5_FW_RESET_FLAGS_PENDING_COMP
+ MLX5_FW_RESET_FLAGS_PENDING_COMP,
+ MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
};
struct mlx5_fw_reset {
@@ -57,7 +58,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
}
-static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
+ u8 *reset_type, u8 *reset_state)
{
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -71,25 +73,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r
*reset_level = MLX5_GET(mfrl_reg, out, reset_level);
if (reset_type)
*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+ if (reset_state)
+ *reset_state = MLX5_GET(mfrl_reg, out, reset_state);
return 0;
}
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
{
- return mlx5_reg_mfrl_query(dev, reset_level, reset_type);
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
}
-int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel)
+static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack)
+{
+ u8 reset_state;
+
+ if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+ goto out;
+
+ switch (reset_state) {
+ case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
+ case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
+ return -EBUSY;
+ case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
+ return -ETIMEDOUT;
+ case MLX5_MFRL_REG_RESET_STATE_NACK:
+ NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
+ return -EPERM;
+ }
+
+out:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset failed");
+ return -EIO;
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
int err;
set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true);
- if (err)
- clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- return err;
+
+ MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1);
+ err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MFRL, 0, 1, false);
+ if (!err)
+ return 0;
+
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
+ return mlx5_fw_reset_get_reset_state_err(dev, extack);
+
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
+ return mlx5_cmd_check(dev, err, in, out);
}
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
@@ -105,44 +149,48 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
complete(&fw_reset->done);
} else {
- mlx5_load_one(dev);
+ mlx5_unload_one(dev);
+ if (mlx5_health_wait_pci_up(dev))
+ mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
+ else
+ mlx5_load_one(dev, false);
devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
}
}
-static void mlx5_sync_reset_reload_work(struct work_struct *work)
-{
- struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
- reset_reload_work);
- struct mlx5_core_dev *dev = fw_reset->dev;
- int err;
-
- mlx5_enter_error_state(dev, true);
- mlx5_unload_one(dev);
- err = mlx5_health_wait_pci_up(dev);
- if (err)
- mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
- fw_reset->ret = err;
- mlx5_fw_reset_complete_reload(dev);
-}
-
static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
- del_timer(&fw_reset->timer);
+ del_timer_sync(&fw_reset->timer);
}
-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
mlx5_stop_sync_reset_poll(dev);
- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
+ return 0;
+}
+
+static void mlx5_sync_reset_reload_work(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_reload_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_sync_reset_clear_reset_requested(dev, false);
+ mlx5_enter_error_state(dev, true);
+ mlx5_fw_reset_complete_reload(dev);
}
#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
@@ -159,8 +207,10 @@ static void poll_sync_reset(struct timer_list *t)
if (fatal_error) {
mlx5_core_warn(dev, "Got Device Reset\n");
- mlx5_sync_reset_clear_reset_requested(dev, false);
- queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ else
+ mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
return;
}
@@ -186,13 +236,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
mlx5_stop_health_poll(dev, true);
- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
+ return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -221,7 +275,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
- mlx5_sync_reset_set_reset_requested(dev);
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -303,6 +359,23 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
err = -ETIMEDOUT;
}
+ do {
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &reg16);
+ if (err)
+ return err;
+ if (reg16 == dev_id)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 == dev_id) {
+ mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n");
+ } else {
+ mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n",
+ reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
+ err = -ETIMEDOUT;
+ }
+
restore:
list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
pci_cfg_access_unlock(sdev);
@@ -319,7 +392,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
- mlx5_sync_reset_clear_reset_requested(dev, false);
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
@@ -336,7 +410,6 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
}
mlx5_enter_error_state(dev, true);
- mlx5_unload_one(dev);
done:
fw_reset->ret = err;
mlx5_fw_reset_complete_reload(dev);
@@ -348,10 +421,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
-
- mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}
@@ -380,9 +451,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
struct mlx5_eqe *eqe = data;
+ if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ return NOTIFY_DONE;
+
switch (eqe->sub_type) {
case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
- queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+ queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
break;
case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
mlx5_sync_reset_events_handle(fw_reset, eqe);
@@ -426,6 +500,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
}
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+ cancel_work_sync(&fw_reset->fw_live_patch_work);
+ cancel_work_sync(&fw_reset->reset_request_work);
+ cancel_work_sync(&fw_reset->reset_reload_work);
+ cancel_work_sync(&fw_reset->reset_now_work);
+ cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
index 7761ee5fc7d0..dc141c7e641a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -9,12 +9,14 @@
void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
-int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel);
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack);
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 3ca998874c50..86ed87d704f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
@@ -420,6 +419,11 @@ static void print_health_info(struct mlx5_core_dev *dev)
if (!ioread8(&h->synd))
return;
+ if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
+ mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
+ return;
+ }
+
rfr_severity = ioread8(&h->rfr_severity);
severity = mlx5_health_get_severity(rfr_severity);
mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
@@ -597,7 +601,7 @@ static void mlx5_fw_reporter_err_work(struct work_struct *work)
fw_reporter_ctx.miss_counter = health->miss_counter;
if (fw_reporter_ctx.err_synd) {
devlink_health_report(health->fw_reporter,
- "FW syndrom reported", &fw_reporter_ctx);
+ "FW syndrome reported", &fw_reporter_ctx);
return;
}
if (fw_reporter_ctx.miss_counter)
@@ -662,16 +666,20 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
struct mlx5_fw_reporter_ctx fw_reporter_ctx;
struct mlx5_core_health *health;
struct mlx5_core_dev *dev;
+ struct devlink *devlink;
struct mlx5_priv *priv;
health = container_of(work, struct mlx5_core_health, fatal_report_work);
priv = container_of(health, struct mlx5_priv, health);
dev = container_of(priv, struct mlx5_core_dev, priv);
+ devlink = priv_to_devlink(dev);
enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+ devl_lock(devlink);
if (mlx5_health_try_recover(dev))
mlx5_core_err(dev, "health recovery failed\n");
+ devl_unlock(devlink);
return;
}
fw_reporter_ctx.err_synd = health->synd;
@@ -694,11 +702,25 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
.dump = mlx5_fw_fatal_reporter_dump,
};
-#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
+#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
+#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
+#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
+#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
+
static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct devlink *devlink = priv_to_devlink(dev);
+ u64 grace_period;
+
+ if (mlx5_core_is_ecpf(dev)) {
+ grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
+ } else if (mlx5_core_is_pf(dev)) {
+ grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
+ } else {
+ /* VF or SF */
+ grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
+ }
health->fw_reporter =
devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
@@ -710,7 +732,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
health->fw_fatal_reporter =
devlink_health_reporter_create(devlink,
&mlx5_fw_fatal_reporter_ops,
- MLX5_REPORTER_FW_GRACEFUL_PERIOD,
+ grace_period,
dev);
if (IS_ERR(health->fw_fatal_reporter))
mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
@@ -835,9 +857,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
add_timer(&health->timer);
-
- if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
- queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
}
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
@@ -854,6 +873,14 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
del_timer_sync(&health->timer);
}
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
+}
+
void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -867,13 +894,6 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
cancel_work_sync(&health->fatal_report_work);
}
-void mlx5_health_flush(struct mlx5_core_dev *dev)
-{
- struct mlx5_core_health *health = &dev->priv.health;
-
- flush_workqueue(health->wq);
-}
-
void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 962d41418ce7..c247cca154e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -32,6 +32,7 @@
#include "en.h"
#include "ipoib.h"
+#include "en/fs_ethtool.h"
static void mlx5i_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
@@ -39,7 +40,7 @@ static void mlx5i_get_drvinfo(struct net_device *dev,
struct mlx5e_priv *priv = mlx5i_epriv(dev);
mlx5e_ethtool_get_drvinfo(priv, drvinfo);
- strlcpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]",
+ strscpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]",
sizeof(drvinfo->driver));
}
@@ -67,7 +68,9 @@ static void mlx5i_get_ethtool_stats(struct net_device *dev,
}
static int mlx5i_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
@@ -75,11 +78,13 @@ static int mlx5i_set_ringparam(struct net_device *dev,
}
static void mlx5i_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
- mlx5e_ethtool_get_ringparam(priv, param);
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
}
static int mlx5i_set_channels(struct net_device *dev,
@@ -105,7 +110,7 @@ static int mlx5i_set_coalesce(struct net_device *netdev,
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- return mlx5e_ethtool_set_coalesce(priv, coal);
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
}
static int mlx5i_get_coalesce(struct net_device *netdev,
@@ -115,7 +120,7 @@ static int mlx5i_get_coalesce(struct net_device *netdev,
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- return mlx5e_ethtool_get_coalesce(priv, coal);
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
}
static int mlx5i_get_ts_info(struct net_device *netdev,
@@ -217,7 +222,6 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev,
return 0;
}
-#ifdef CONFIG_MLX5_EN_RXNFC
static u32 mlx5i_flow_type_mask(u32 flow_type)
{
return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
@@ -239,9 +243,18 @@ static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+ * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+ * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+ * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+ */
+ if (info->cmd == ETHTOOL_GRXRINGS) {
+ info->data = priv->channels.params.num_channels;
+ return 0;
+ }
+
return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
}
-#endif
const struct ethtool_ops mlx5i_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
@@ -259,10 +272,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
.get_coalesce = mlx5i_get_coalesce,
.set_coalesce = mlx5i_set_coalesce,
.get_ts_info = mlx5i_get_ts_info,
-#ifdef CONFIG_MLX5_EN_RXNFC
.get_rxnfc = mlx5i_get_rxnfc,
.set_rxnfc = mlx5i_set_rxnfc,
-#endif
.get_link_ksettings = mlx5i_get_link_ksettings,
.get_link = ethtool_op_get_link,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index ea1efdecc88c..4e3a75496dd9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -35,6 +35,7 @@
#include "en.h"
#include "en/params.h"
#include "ipoib.h"
+#include "en/fs_ethtool.h"
#define IB_DEFAULT_Q_KEY 0xb1b
#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
@@ -110,14 +111,14 @@ void mlx5i_cleanup(struct mlx5e_priv *priv)
static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
{
- struct mlx5e_sw_stats s = { 0 };
+ struct rtnl_link_stats64 s = {};
int i, j;
for (i = 0; i < priv->stats_nch; i++) {
struct mlx5e_channel_stats *channel_stats;
struct mlx5e_rq_stats *rq_stats;
- channel_stats = &priv->channel_stats[i];
+ channel_stats = priv->channel_stats[i];
rq_stats = &channel_stats->rq;
s.rx_packets += rq_stats->packets;
@@ -128,11 +129,17 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
s.tx_packets += sq_stats->packets;
s.tx_bytes += sq_stats->bytes;
- s.tx_queue_dropped += sq_stats->dropped;
+ s.tx_dropped += sq_stats->dropped;
}
}
- memcpy(&priv->stats.sw, &s, sizeof(s));
+ memset(&priv->stats.sw, 0, sizeof(s));
+
+ priv->stats.sw.rx_packets = s.rx_packets;
+ priv->stats.sw.rx_bytes = s.rx_bytes;
+ priv->stats.sw.tx_packets = s.tx_packets;
+ priv->stats.sw.tx_bytes = s.tx_bytes;
+ priv->stats.sw.tx_queue_dropped = s.tx_dropped;
}
void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
@@ -314,43 +321,47 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
{
+ struct mlx5_flow_namespace *ns =
+ mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
int err;
- priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
- if (!priv->fs.ns)
+ if (!ns)
return -EINVAL;
- err = mlx5e_arfs_create_tables(priv);
+ mlx5e_fs_set_ns(priv->fs, ns, false);
+ err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
if (err) {
netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
err);
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- err = mlx5e_create_ttc_table(priv);
+ err = mlx5e_create_ttc_table(priv->fs, priv->rx_res);
if (err) {
netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
goto err_destroy_arfs_tables;
}
- mlx5e_ethtool_init_steering(priv);
+ mlx5e_ethtool_init_steering(priv->fs);
return 0;
err_destroy_arfs_tables:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
return err;
}
static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
{
- mlx5e_destroy_ttc_table(priv);
- mlx5e_arfs_destroy_tables(priv);
- mlx5e_ethtool_cleanup_steering(priv);
+ mlx5e_destroy_ttc_table(priv->fs);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
+ mlx5e_ethtool_cleanup_steering(priv->fs);
}
static int mlx5i_init_rx(struct mlx5e_priv *priv)
@@ -358,9 +369,18 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
int err;
- priv->rx_res = mlx5e_rx_res_alloc();
- if (!priv->rx_res)
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
return -ENOMEM;
+ }
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res) {
+ err = -ENOMEM;
+ goto err_free_fs;
+ }
mlx5e_create_q_counters(priv);
@@ -391,6 +411,8 @@ err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
+err_free_fs:
+ mlx5e_fs_cleanup(priv->fs);
return err;
}
@@ -402,6 +424,7 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
mlx5e_destroy_q_counters(priv);
mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
+ mlx5e_fs_cleanup(priv->fs);
}
/* The stats groups order is opposite to the update_stats() order calls */
@@ -440,10 +463,8 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers = &mlx5i_rx_handlers,
.max_tc = MLX5I_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5i_stats_grps,
.stats_grps_num = mlx5i_stats_grps_num,
- .rx_ptp_support = false,
};
/* mlx5i netdev NDos */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 5308f23702bc..0227a521d301 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -349,8 +349,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.update_stats = NULL,
.rx_handlers = &mlx5i_rx_handlers,
.max_tc = MLX5I_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
- .rx_ptp_support = false,
};
const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
new file mode 100644
index 000000000000..380a208ab137
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+
+static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]--;
+}
+
+static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]++;
+}
+
+/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
+static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask)
+{
+ int best_cpu = -1;
+ int cpu;
+
+ for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
+ /* CPU has zero IRQs on it. No need to search any more CPUs. */
+ if (!pool->irqs_per_cpu[cpu]) {
+ best_cpu = cpu;
+ break;
+ }
+ if (best_cpu < 0)
+ best_cpu = cpu;
+ if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
+ best_cpu = cpu;
+ }
+ if (best_cpu == -1) {
+ /* There isn't online CPUs in req_mask */
+ mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
+ cpumask_pr_args(req_mask));
+ best_cpu = cpumask_first(cpu_online_mask);
+ }
+ pool->irqs_per_cpu[best_cpu]++;
+ return best_cpu;
+}
+
+/* Creating an IRQ from irq_pool */
+static struct mlx5_irq *
+irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ cpumask_var_t auto_mask;
+ struct mlx5_irq *irq;
+ u32 irq_index;
+ int err;
+
+ if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
+ if (err)
+ return ERR_PTR(err);
+ if (pool->irqs_per_cpu) {
+ if (cpumask_weight(req_mask) > 1)
+ /* if req_mask contain more then one CPU, set the least loadad CPU
+ * of req_mask
+ */
+ cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ else
+ cpu_get(pool, cpumask_first(req_mask));
+ }
+ irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
+ free_cpumask_var(auto_mask);
+ return irq;
+}
+
+/* Looking for the IRQ with the smallest refcount that fits req_mask.
+ * If pool is sf_comp_pool, then we are looking for an IRQ with any of the
+ * requested CPUs in req_mask.
+ * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
+ * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
+ * we don't skip it.
+ * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
+ * fit. And since mask is subset of itself, we will pass the first if bellow.
+ */
+static struct mlx5_irq *
+irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ int start = pool->xa_num_irqs.min;
+ int end = pool->xa_num_irqs.max;
+ struct mlx5_irq *irq = NULL;
+ struct mlx5_irq *iter;
+ int irq_refcount = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&pool->lock);
+ xa_for_each_range(&pool->irqs, index, iter, start, end) {
+ struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
+ int iter_refcount = mlx5_irq_read_locked(iter);
+
+ if (!cpumask_subset(iter_mask, req_mask))
+ /* skip IRQs with a mask which is not subset of req_mask */
+ continue;
+ if (iter_refcount < pool->min_threshold)
+ /* If we found an IRQ with less than min_thres, return it */
+ return iter;
+ if (!irq || iter_refcount < irq_refcount) {
+ /* In case we won't find an IRQ with less than min_thres,
+ * keep a pointer to the least used IRQ
+ */
+ irq_refcount = iter_refcount;
+ irq = iter;
+ }
+ }
+ return irq;
+}
+
+/**
+ * mlx5_irq_affinity_request - request an IRQ according to the given mask.
+ * @pool: IRQ pool to request from.
+ * @req_mask: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ struct mlx5_irq *least_loaded_irq, *new_irq;
+
+ mutex_lock(&pool->lock);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ if (least_loaded_irq &&
+ mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
+ goto out;
+ /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
+ new_irq = irq_pool_request_irq(pool, req_mask);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+ /* We failed to create an IRQ and we didn't find an IRQ */
+ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
+ PTR_ERR(new_irq));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+ /* We failed to create a new IRQ for the requested affinity,
+ * sharing existing IRQ.
+ */
+ goto out;
+ }
+ least_loaded_irq = new_irq;
+ goto unlock;
+out:
+ mlx5_irq_get_locked(least_loaded_irq);
+ if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold)
+ mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+ pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(least_loaded_irq)), pool->name,
+ mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+ mutex_unlock(&pool->lock);
+ return least_loaded_irq;
+}
+
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ int i;
+
+ for (i = 0; i < num_irqs; i++) {
+ int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
+
+ synchronize_irq(pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(irqs[i])));
+ if (mlx5_irq_put(irqs[i]))
+ if (pool->irqs_per_cpu)
+ cpu_put(pool, cpu);
+ }
+}
+
+/**
+ * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bounded to at most 1 CPU.
+ * This function is requesting IRQs according to the default assignment.
+ * The default assignment policy is:
+ * - in each iteration, request the least loaded IRQ which is not bound to any
+ * CPU of the previous IRQs requested.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i = 0;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_copy(req_mask, cpu_online_mask);
+ for (i = 0; i < nirqs; i++) {
+ if (mlx5_irq_pool_is_sf_pool(pool))
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ else
+ /* In case SF pool doesn't exists, fallback to the PF IRQs.
+ * The PF IRQs are already allocated and binded to CPU
+ * at this point. Hence, only an index is needed.
+ */
+ irq = mlx5_irq_request(dev, i, NULL);
+ if (IS_ERR(irq))
+ break;
+ irqs[i] = irq;
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+ cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+ mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
+ }
+ free_cpumask_var(req_mask);
+ if (!i)
+ return PTR_ERR(irq);
+ return i;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
new file mode 100644
index 000000000000..b8feaf0f5c4c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lag.h"
+
+static char *get_str_mode_type(struct mlx5_lag *ldev)
+{
+ switch (ldev->mode) {
+ case MLX5_LAG_MODE_ROCE: return "roce";
+ case MLX5_LAG_MODE_SRIOV: return "switchdev";
+ case MLX5_LAG_MODE_MULTIPATH: return "multipath";
+ case MLX5_LAG_MODE_MPESW: return "multiport_eswitch";
+ default: return "invalid";
+ }
+
+ return NULL;
+}
+
+static int type_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ char *mode = NULL;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = get_str_mode_type(ldev);
+ mutex_unlock(&ldev->lock);
+ if (!mode)
+ return -EINVAL;
+ seq_printf(file, "%s\n", mode);
+
+ return 0;
+}
+
+static int port_sel_mode_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int ret = 0;
+ char *mode;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&ldev->lock);
+ if (ret)
+ return ret;
+
+ seq_printf(file, "%s\n", mode);
+ return 0;
+}
+
+static int state_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ bool active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+ seq_printf(file, "%s\n", active ? "active" : "disabled");
+ return 0;
+}
+
+static int flags_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ bool fdb_sel_mode_native;
+ struct mlx5_lag *ldev;
+ bool shared_fdb;
+ bool lag_active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (!lag_active)
+ goto unlock;
+
+ shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &ldev->mode_flags);
+
+unlock:
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
+ seq_printf(file, "%s:%s\n", "fdb_selection_mode",
+ fdb_sel_mode_native ? "native" : "affinity");
+ return 0;
+}
+
+static int mapping_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ u8 ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_lag *ldev;
+ bool hash = false;
+ bool lag_active;
+ int num_ports;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (lag_active) {
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
+ &num_ports);
+ hash = true;
+ } else {
+ for (i = 0; i < ldev->ports; i++)
+ ports[i] = ldev->v2p_map[i];
+ num_ports = ldev->ports;
+ }
+ }
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ for (i = 0; i < num_ports; i++) {
+ if (hash)
+ seq_printf(file, "%d\n", ports[i] + 1);
+ else
+ seq_printf(file, "%d:%d\n", i + 1, ports[i]);
+ }
+
+ return 0;
+}
+
+static int members_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+ seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
+ }
+ mutex_unlock(&ldev->lock);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(type);
+DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
+DEFINE_SHOW_ATTRIBUTE(state);
+DEFINE_SHOW_ATTRIBUTE(flags);
+DEFINE_SHOW_ATTRIBUTE(mapping);
+DEFINE_SHOW_ATTRIBUTE(members);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
+{
+ struct dentry *dbg;
+
+ dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
+ dev->priv.dbg.lag_debugfs = dbg;
+
+ debugfs_create_file("type", 0444, dbg, dev, &type_fops);
+ debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
+ debugfs_create_file("state", 0444, dbg, dev, &state_fops);
+ debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
+ debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
+ debugfs_create_file("members", 0444, dbg, dev, &members_fops);
+}
+
+void mlx5_ldev_remove_debugfs(struct dentry *dbg)
+{
+ debugfs_remove_recursive(dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 4ddf6b330a44..a9f4ede4a9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -31,14 +31,22 @@
*/
#include <linux/netdevice.h>
+#include <net/bonding.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
#include "lib/devcom.h"
#include "mlx5_core.h"
#include "eswitch.h"
+#include "esw/acl/ofld.h"
#include "lag.h"
#include "mp.h"
+#include "mpesw.h"
+
+enum {
+ MLX5_LAG_EGRESS_PORT_1 = 1,
+ MLX5_LAG_EGRESS_PORT_2,
+};
/* General purpose, use for short periods of time.
* Beware of lock dependencies (preferably, no locks should be acquired
@@ -46,28 +54,67 @@
*/
static DEFINE_SPINLOCK(lag_lock);
-static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2, bool shared_fdb, u8 flags)
+static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
{
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
+
+ return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
+}
+
+static u8 lag_active_port_bits(struct mlx5_lag *ldev)
+{
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ u8 active_port = 0;
+ int num_enabled;
+ int idx;
+
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (idx = 0; idx < num_enabled; idx++)
+ active_port |= BIT_MASK(enabled_ports[idx]);
+
+ return active_port;
+}
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
+ unsigned long flags)
+{
+ bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &flags);
+ int port_sel_mode = get_port_sel_mode(mode, flags);
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
- void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+ void *lag_ctx;
+ lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
- MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
- if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) {
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
- } else {
- MLX5_SET(lagc, lag_ctx, port_select_mode,
- MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT);
+ switch (port_sel_mode) {
+ case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
+ break;
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
+ if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
+ break;
+
+ MLX5_SET(lagc, lag_ctx, active_port,
+ lag_active_port_bits(mlx5_lag_dev(dev)));
+ break;
+ default:
+ break;
}
+ MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
return mlx5_cmd_exec_in(dev, create_lag, in);
}
-static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
+ u8 *ports)
{
u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
@@ -75,8 +122,8 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
MLX5_SET(modify_lag_in, in, field_select, 0x1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
return mlx5_cmd_exec_in(dev, modify_lag, in);
}
@@ -101,6 +148,75 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_disabled)
+{
+ int i;
+
+ *num_disabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (!tracker->netdev_state[i].tx_enabled ||
+ !tracker->netdev_state[i].link_up)
+ ports[(*num_disabled)++] = i;
+ }
+}
+
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled)
+{
+ int i;
+
+ *num_enabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ ports[(*num_enabled)++] = i;
+ }
+
+ if (*num_enabled == 0)
+ mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
+}
+
+static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
+ struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ unsigned long flags)
+{
+ char buf[MLX5_MAX_PORTS * 10 + 1] = {};
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ int written = 0;
+ int num_enabled;
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
+ mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (i = 0; i < num_enabled; i++) {
+ err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
+ if (err != 3)
+ return;
+ written += err;
+ }
+ buf[written - 2] = 0;
+ mlx5_core_info(dev, "lag map active ports: %s\n", buf);
+ } else {
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ err = scnprintf(buf + written, 10,
+ " port %d:%d", i + 1, ldev->v2p_map[idx]);
+ if (err != 9)
+ return;
+ written += err;
+ }
+ }
+ mlx5_core_info(dev, "lag map:%s\n", buf);
+ }
+}
+
static int mlx5_lag_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr);
static void mlx5_do_bond_work(struct work_struct *work);
@@ -112,8 +228,10 @@ static void mlx5_ldev_free(struct kref *ref)
if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
- cancel_delayed_work_sync(&ldev->bond_work);
+ mlx5_lag_mpesw_cleanup(ldev);
+ cancel_work_sync(&ldev->mpesw_work);
destroy_workqueue(ldev->wq);
+ mutex_destroy(&ldev->lock);
kfree(ldev);
}
@@ -143,6 +261,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
}
kref_init(&ldev->ref);
+ mutex_init(&ldev->lock);
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
ldev->nb.notifier_call = mlx5_lag_netdev_event;
@@ -150,12 +269,17 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
+ ldev->mode = MLX5_LAG_MODE_NONE;
err = mlx5_lag_mp_init(ldev);
if (err)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);
+ mlx5_lag_mpesw_init(ldev);
+ ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
+ ldev->buckets = 1;
+
return ldev;
}
@@ -164,7 +288,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < ldev->ports; i++)
if (ldev->pf[i].netdev == ndev)
return i;
@@ -173,108 +297,278 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+ return ldev->mode == MLX5_LAG_MODE_ROCE;
}
static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
+ return ldev->mode == MLX5_LAG_MODE_SRIOV;
}
+/* Create a mapping between steering slots and active ports.
+ * As we have ldev->buckets slots per port first assume the native
+ * mapping should be used.
+ * If there are ports that are disabled fill the relevant slots
+ * with mapping that points to active ports.
+ */
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
- u8 *port1, u8 *port2)
+ u8 num_ports,
+ u8 buckets,
+ u8 *ports)
{
- bool p1en;
- bool p2en;
+ int disabled[MLX5_MAX_PORTS] = {};
+ int enabled[MLX5_MAX_PORTS] = {};
+ int disabled_ports_num = 0;
+ int enabled_ports_num = 0;
+ int idx;
+ u32 rand;
+ int i;
+ int j;
- p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
- tracker->netdev_state[MLX5_LAG_P1].link_up;
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ enabled[enabled_ports_num++] = i;
+ else
+ disabled[disabled_ports_num++] = i;
+ }
- p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
- tracker->netdev_state[MLX5_LAG_P2].link_up;
+ /* Use native mapping by default where each port's buckets
+ * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+ */
+ for (i = 0; i < num_ports; i++)
+ for (j = 0; j < buckets; j++) {
+ idx = i * buckets + j;
+ ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
+ }
- *port1 = 1;
- *port2 = 2;
- if ((!p1en && !p2en) || (p1en && p2en))
+ /* If all ports are disabled/enabled keep native mapping */
+ if (enabled_ports_num == num_ports ||
+ disabled_ports_num == num_ports)
return;
- if (p1en)
- *port2 = 1;
- else
- *port1 = 2;
+ /* Go over the disabled ports and for each assign a random active port */
+ for (i = 0; i < disabled_ports_num; i++) {
+ for (j = 0; j < buckets; j++) {
+ get_random_bytes(&rand, 4);
+ ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+ }
+ }
+}
+
+static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].has_drop)
+ return true;
+ return false;
}
-static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2)
+static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].has_drop)
+ continue;
+
+ mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ ldev->pf[i].has_drop = false;
+ }
+}
+
+static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ u8 disabled_ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_core_dev *dev;
+ int disabled_index;
+ int num_disabled;
+ int err;
+ int i;
+
+ /* First delete the current drop rule so there won't be any dropped
+ * packets
+ */
+ mlx5_lag_drop_rule_cleanup(ldev);
+
+ if (!ldev->tracker.has_inactive)
+ return;
+
+ mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
+
+ for (i = 0; i < num_disabled; i++) {
+ disabled_index = disabled_ports[i];
+ dev = ldev->pf[disabled_index].dev;
+ err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ if (!err)
+ ldev->pf[disabled_index].has_drop = true;
+ else
+ mlx5_core_err(dev,
+ "Failed to create lag drop rule, error: %d", err);
+ }
+}
+
+static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
+ void *lag_ctx;
+
+ lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x2);
+
+ MLX5_SET(lagc, lag_ctx, active_port, ports);
+
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
+}
+
+static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ u8 active_ports;
+ int ret;
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
+ ret = mlx5_lag_port_sel_modify(ldev, ports);
+ if (ret ||
+ !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
+ return ret;
- if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED)
- return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2);
- return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ active_ports = lag_active_port_bits(ldev);
+
+ return mlx5_cmd_modify_active_port(dev0, active_ports);
+ }
+ return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
}
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker)
{
+ u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- u8 v2p_port1, v2p_port2;
+ int idx;
int err;
+ int i;
+ int j;
- mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
- &v2p_port2);
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
- if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
- v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
- err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2);
- if (err) {
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
- return;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ports[idx] == ldev->v2p_map[idx])
+ continue;
+ err = _mlx5_modify_lag(ldev, ports);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ return;
+ }
+ memcpy(ldev->v2p_map, ports, sizeof(ports));
+
+ mlx5_lag_print_mapping(dev0, ldev, tracker,
+ ldev->mode_flags);
+ break;
}
- ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
- ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
- mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
}
+
+ if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ !(ldev->mode == MLX5_LAG_MODE_ROCE))
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+}
+
+static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
+ unsigned long *flags)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+
+ if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
+ if (ldev->ports > 2)
+ return -EINVAL;
+ return 0;
+ }
+
+ if (ldev->ports > 2)
+ ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
+
+ set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
+
+ return 0;
}
-static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
- struct lag_tracker *tracker, u8 *flags)
+static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ unsigned long *flags)
{
- bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
- if (roce_lag ||
- !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) ||
- tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ if (mode == MLX5_LAG_MODE_MPESW)
return;
- *flags |= MLX5_LAG_FLAG_HASH_BASED;
+
+ if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
+ set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
+}
+
+static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
+ struct lag_tracker *tracker, bool shared_fdb,
+ unsigned long *flags)
+{
+ bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
+
+ *flags = 0;
+ if (shared_fdb) {
+ set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+ }
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+
+ if (roce_lag)
+ return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
+
+ mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
+ return 0;
}
-static char *get_str_port_sel_mode(u8 flags)
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
{
- if (flags & MLX5_LAG_FLAG_HASH_BASED)
- return "hash";
- return "queue_affinity";
+ int port_sel_mode = get_port_sel_mode(mode, flags);
+
+ switch (port_sel_mode) {
+ case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
+ default: return "invalid";
+ }
}
static int mlx5_create_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- bool shared_fdb, u8 flags)
+ enum mlx5_lag_mode mode,
+ unsigned long flags)
{
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
int err;
- mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s",
- ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
- shared_fdb, get_str_port_sel_mode(flags));
+ if (tracker)
+ mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
+ mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
+ shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
- err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags);
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
if (err) {
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
@@ -303,31 +597,35 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags,
+ enum mlx5_lag_mode mode,
bool shared_fdb)
{
- bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
+ bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ unsigned long flags = 0;
int err;
- mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
- &ldev->v2p_map[MLX5_LAG_P2]);
- mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
- if (flags & MLX5_LAG_FLAG_HASH_BASED) {
- err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
- ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
- if (err) {
- mlx5_core_err(dev0,
- "Failed to create LAG port selection(%d)\n",
- err);
- return err;
+ err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
+ if (err)
+ return err;
+
+ if (mode != MLX5_LAG_MODE_MPESW) {
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
+ err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
+ ldev->v2p_map);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to create LAG port selection(%d)\n",
+ err);
+ return err;
+ }
}
}
- err = mlx5_create_lag(ldev, tracker, shared_fdb, flags);
+ err = mlx5_create_lag(ldev, tracker, mode, flags);
if (err) {
- if (flags & MLX5_LAG_FLAG_HASH_BASED)
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
mlx5_lag_port_sel_destroy(ldev);
if (roce_lag)
mlx5_core_err(dev0,
@@ -339,26 +637,32 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
return err;
}
- ldev->flags |= flags;
- ldev->shared_fdb = shared_fdb;
+ if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ !roce_lag)
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+
+ ldev->mode = mode;
+ ldev->mode_flags = flags;
return 0;
}
static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
bool roce_lag = __mlx5_lag_is_roce(ldev);
- u8 flags = ldev->flags;
+ unsigned long flags = ldev->mode_flags;
int err;
- ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+ ldev->mode = MLX5_LAG_MODE_NONE;
+ ldev->mode_flags = 0;
mlx5_lag_mp_reset(ldev);
- if (ldev->shared_fdb) {
- mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
- ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
- ldev->shared_fdb = false;
+ if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
+ mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
}
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
@@ -372,32 +676,55 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
"Failed to deactivate VF LAG; driver restart required\n"
"Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
}
- } else if (flags & MLX5_LAG_FLAG_HASH_BASED) {
- mlx5_lag_port_sel_destroy(ldev);
+ return err;
}
- return err;
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ mlx5_lag_port_sel_destroy(ldev);
+ if (mlx5_lag_has_drop_rule(ldev))
+ mlx5_lag_drop_rule_cleanup(ldev);
+
+ return 0;
}
+#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
{
- if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
- return false;
+#ifdef CONFIG_MLX5_ESWITCH
+ struct mlx5_core_dev *dev;
+ u8 mode;
+#endif
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].dev)
+ return false;
#ifdef CONFIG_MLX5_ESWITCH
- return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
- ldev->pf[MLX5_LAG_P2].dev);
+ dev = ldev->pf[MLX5_LAG_P1].dev;
+ if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev))
+ return false;
+
+ mode = mlx5_eswitch_mode(dev);
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
+ return false;
+
+ if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
+ return false;
#else
- return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
- !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
+ return false;
#endif
+ return true;
}
static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
continue;
@@ -414,7 +741,7 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
continue;
@@ -427,13 +754,14 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
}
}
-static void mlx5_disable_lag(struct mlx5_lag *ldev)
+void mlx5_disable_lag(struct mlx5_lag *ldev)
{
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
- bool shared_fdb = ldev->shared_fdb;
bool roce_lag;
int err;
+ int i;
roce_lag = __mlx5_lag_is_roce(ldev);
@@ -444,7 +772,8 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
}
- mlx5_nic_vport_disable_roce(dev1);
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
}
err = mlx5_deactivate_lag(ldev);
@@ -462,7 +791,7 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
}
}
-static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
@@ -481,13 +810,42 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
return false;
}
+static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
+{
+ bool roce_lag = true;
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
+
+#ifdef CONFIG_MLX5_ESWITCH
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
+#endif
+
+ return roce_lag;
+}
+
+static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
+{
+ return do_bond && __mlx5_lag_is_active(ldev) &&
+ ldev->mode != MLX5_LAG_MODE_MPESW;
+}
+
+static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
+{
+ return !do_bond && __mlx5_lag_is_active(ldev) &&
+ ldev->mode != MLX5_LAG_MODE_MPESW;
+}
+
static void mlx5_do_bond(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
- struct lag_tracker tracker;
+ struct lag_tracker tracker = { };
bool do_bond, roce_lag;
int err;
+ int i;
if (!mlx5_lag_is_ready(ldev)) {
do_bond = false;
@@ -504,21 +862,14 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
if (do_bond && !__mlx5_lag_is_active(ldev)) {
bool shared_fdb = mlx5_shared_fdb_supported(ldev);
- roce_lag = !mlx5_sriov_is_enabled(dev0) &&
- !mlx5_sriov_is_enabled(dev1);
-
-#ifdef CONFIG_MLX5_ESWITCH
- roce_lag = roce_lag &&
- dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
-#endif
+ roce_lag = mlx5_lag_is_roce_lag(ldev);
if (shared_fdb || roce_lag)
mlx5_lag_remove_devices(ldev);
err = mlx5_activate_lag(ldev, &tracker,
- roce_lag ? MLX5_LAG_FLAG_ROCE :
- MLX5_LAG_FLAG_SRIOV,
+ roce_lag ? MLX5_LAG_MODE_ROCE :
+ MLX5_LAG_MODE_SRIOV,
shared_fdb);
if (err) {
if (shared_fdb || roce_lag)
@@ -528,7 +879,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
} else if (roce_lag) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
- mlx5_nic_vport_enable_roce(dev1);
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
} else if (shared_fdb) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
@@ -548,9 +900,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
return;
}
}
- } else if (do_bond && __mlx5_lag_is_active(ldev)) {
+ } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
mlx5_modify_lag(ldev, &tracker);
- } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
+ } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
mlx5_disable_lag(ldev);
}
}
@@ -560,31 +912,11 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
-static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1)
-{
- if (dev0)
- mlx5_esw_lock(dev0->priv.eswitch);
- if (dev1)
- mlx5_esw_lock(dev1->priv.eswitch);
-}
-
-static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1)
-{
- if (dev1)
- mlx5_esw_unlock(dev1->priv.eswitch);
- if (dev0)
- mlx5_esw_unlock(dev0->priv.eswitch);
-}
-
static void mlx5_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
bond_work);
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int status;
status = mlx5_dev_list_trylock();
@@ -593,27 +925,29 @@ static void mlx5_do_bond_work(struct work_struct *work)
return;
}
+ mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
mlx5_dev_list_unlock();
mlx5_queue_bond_work(ldev, HZ);
return;
}
- mlx5_lag_lock_eswitches(dev0, dev1);
mlx5_do_bond(ldev);
- mlx5_lag_unlock_eswitches(dev0, dev1);
+ mutex_unlock(&ldev->lock);
mlx5_dev_list_unlock();
}
static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- struct net_device *ndev,
struct netdev_notifier_changeupper_info *info)
{
struct net_device *upper = info->upper_dev, *ndev_tmp;
struct netdev_lag_upper_info *lag_upper_info = NULL;
bool is_bonded, is_in_lag, mode_supported;
- int bond_status = 0;
+ bool has_inactive = 0;
+ struct slave *slave;
+ u8 bond_status = 0;
int num_slaves = 0;
int changed = 0;
int idx;
@@ -632,15 +966,19 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
- if (idx >= 0)
+ if (idx >= 0) {
+ slave = bond_slave_get_rcu(ndev_tmp);
+ if (slave)
+ has_inactive |= bond_is_slave_inactive(slave);
bond_status |= (1 << idx);
+ }
num_slaves++;
}
rcu_read_unlock();
/* None of this lagdev's netdevs are slaves of this master. */
- if (!(bond_status & 0x3))
+ if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
return 0;
if (lag_upper_info) {
@@ -648,11 +986,13 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
tracker->hash_type = lag_upper_info->hash_type;
}
+ tracker->has_inactive = has_inactive;
/* Determine bonding status:
* A device is considered bonded if both its physical ports are slaves
* of the same lag master, and only them.
*/
- is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
+ is_in_lag = num_slaves == ldev->ports &&
+ bond_status == GENMASK(ldev->ports - 1, 0);
/* Lag mode must be activebackup or hash. */
mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
@@ -704,6 +1044,39 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
return 1;
}
+static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev)
+{
+ struct net_device *ndev_tmp;
+ struct slave *slave;
+ bool has_inactive = 0;
+ int idx;
+
+ if (!netif_is_lag_master(ndev))
+ return 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx < 0)
+ continue;
+
+ slave = bond_slave_get_rcu(ndev_tmp);
+ if (slave)
+ has_inactive |= bond_is_slave_inactive(slave);
+ }
+ rcu_read_unlock();
+
+ if (tracker->has_inactive == has_inactive)
+ return 0;
+
+ tracker->has_inactive = has_inactive;
+
+ return 1;
+}
+
+/* this handler is always registered to netdev events */
static int mlx5_lag_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -712,7 +1085,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
struct mlx5_lag *ldev;
int changed = 0;
- if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
+ if (event != NETDEV_CHANGEUPPER &&
+ event != NETDEV_CHANGELOWERSTATE &&
+ event != NETDEV_CHANGEINFODATA)
return NOTIFY_DONE;
ldev = container_of(this, struct mlx5_lag, nb);
@@ -721,13 +1096,15 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
switch (event) {
case NETDEV_CHANGEUPPER:
- changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
- ptr);
+ changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
break;
case NETDEV_CHANGELOWERSTATE:
changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
ndev, ptr);
break;
+ case NETDEV_CHANGEINFODATA:
+ changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
+ break;
}
ldev->tracker = tracker;
@@ -743,30 +1120,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
unsigned int fn = mlx5_get_dev_index(dev);
+ unsigned long flags;
- if (fn >= MLX5_MAX_PORTS)
+ if (fn >= ldev->ports)
return;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ spin_lock_irqsave(&lag_lock, flags);
+ for (i = 0; i < ldev->ports; i++) {
if (ldev->pf[i].netdev == netdev) {
ldev->pf[i].netdev = NULL;
break;
}
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
@@ -774,24 +1153,23 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
{
unsigned int fn = mlx5_get_dev_index(dev);
- if (fn >= MLX5_MAX_PORTS)
+ if (fn >= ldev->ports)
return;
ldev->pf[fn].dev = dev;
dev->priv.lag = ldev;
}
-/* Must be called with intf_mutex held */
static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < ldev->ports; i++)
if (ldev->pf[i].dev == dev)
break;
- if (i == MLX5_MAX_PORTS)
+ if (i == ldev->ports)
return;
ldev->pf[i].dev = NULL;
@@ -804,12 +1182,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
- if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
- !MLX5_CAP_GEN(dev, lag_master) ||
- MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
- return 0;
-
- tmp_dev = mlx5_get_next_phys_dev(dev);
+ tmp_dev = mlx5_get_next_phys_dev_lag(dev);
if (tmp_dev)
ldev = tmp_dev->priv.lag;
@@ -819,13 +1192,18 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "Failed to alloc lag dev\n");
return 0;
}
- } else {
- if (ldev->mode_changes_in_progress)
- return -EAGAIN;
- mlx5_ldev_get(ldev);
+ mlx5_ldev_add_mdev(ldev, dev);
+ return 0;
}
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ return -EAGAIN;
+ }
+ mlx5_ldev_get(ldev);
mlx5_ldev_add_mdev(ldev, dev);
+ mutex_unlock(&ldev->lock);
return 0;
}
@@ -838,15 +1216,19 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;
+ /* mdev is being removed, might as well remove debugfs
+ * as early as possible.
+ */
+ mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
recheck:
- mlx5_dev_list_lock();
+ mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
- mlx5_dev_list_unlock();
+ mutex_unlock(&ldev->lock);
msleep(100);
goto recheck;
}
mlx5_ldev_remove_mdev(ldev, dev);
- mlx5_dev_list_unlock();
+ mutex_unlock(&ldev->lock);
mlx5_ldev_put(ldev);
}
@@ -854,35 +1236,45 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
int err;
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
+ return;
+
recheck:
mlx5_dev_list_lock();
err = __mlx5_lag_dev_add_mdev(dev);
+ mlx5_dev_list_unlock();
+
if (err) {
- mlx5_dev_list_unlock();
msleep(100);
goto recheck;
}
- mlx5_dev_list_unlock();
+ mlx5_ldev_add_debugfs(dev);
}
-/* Must be called with intf_mutex held */
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
struct net_device *netdev)
{
struct mlx5_lag *ldev;
+ bool lag_is_active;
ldev = mlx5_lag_dev(dev);
if (!ldev)
return;
+ mutex_lock(&ldev->lock);
mlx5_ldev_remove_netdev(ldev, netdev);
- ldev->flags &= ~MLX5_LAG_FLAG_READY;
+ clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
- if (__mlx5_lag_is_active(ldev))
+ lag_is_active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+
+ if (lag_is_active)
mlx5_queue_bond_work(ldev, 0);
}
-/* Must be called with intf_mutex held */
void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
struct net_device *netdev)
{
@@ -893,26 +1285,29 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
if (!ldev)
return;
+ mutex_lock(&ldev->lock);
mlx5_ldev_add_netdev(ldev, dev, netdev);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (!ldev->pf[i].dev)
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].netdev)
break;
- if (i >= MLX5_MAX_PORTS)
- ldev->flags |= MLX5_LAG_FLAG_READY;
+ if (i >= ldev->ports)
+ set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+ mutex_unlock(&ldev->lock);
mlx5_queue_bond_work(ldev, 0);
}
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -921,27 +1316,45 @@ EXPORT_SYMBOL(mlx5_lag_is_roce);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
EXPORT_SYMBOL(mlx5_lag_is_active);
+bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res = 0;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (ldev)
+ res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
+
bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev) &&
dev == ldev->pf[MLX5_LAG_P1].dev;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -950,12 +1363,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -964,12 +1378,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
- res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
- spin_unlock(&lag_lock);
+ res = ldev && __mlx5_lag_is_sriov(ldev) &&
+ test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -977,8 +1393,6 @@ EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
{
- struct mlx5_core_dev *dev0;
- struct mlx5_core_dev *dev1;
struct mlx5_lag *ldev;
ldev = mlx5_lag_dev(dev);
@@ -986,16 +1400,13 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
return;
mlx5_dev_list_lock();
-
- dev0 = ldev->pf[MLX5_LAG_P1].dev;
- dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ mutex_lock(&ldev->lock);
ldev->mode_changes_in_progress++;
- if (__mlx5_lag_is_active(ldev)) {
- mlx5_lag_lock_eswitches(dev0, dev1);
+ if (__mlx5_lag_is_active(ldev))
mlx5_disable_lag(ldev);
- mlx5_lag_unlock_eswitches(dev0, dev1);
- }
+
+ mutex_unlock(&ldev->lock);
mlx5_dev_list_unlock();
}
@@ -1007,9 +1418,9 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
if (!ldev)
return;
- mlx5_dev_list_lock();
+ mutex_lock(&ldev->lock);
ldev->mode_changes_in_progress--;
- mlx5_dev_list_unlock();
+ mutex_unlock(&ldev->lock);
mlx5_queue_bond_work(ldev, 0);
}
@@ -1017,17 +1428,21 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
{
struct net_device *ndev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
+ int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
- ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
- ldev->pf[MLX5_LAG_P1].netdev :
- ldev->pf[MLX5_LAG_P2].netdev;
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->tracker.netdev_state[i].tx_enabled)
+ ndev = ldev->pf[i].netdev;
+ if (!ndev)
+ ndev = ldev->pf[ldev->ports - 1].netdev;
} else {
ndev = ldev->pf[MLX5_LAG_P1].netdev;
}
@@ -1035,7 +1450,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
dev_hold(ndev);
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return ndev;
}
@@ -1045,32 +1460,49 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
u8 port = 0;
+ int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
- if (ldev->pf[MLX5_LAG_P1].netdev == slave)
- port = MLX5_LAG_P1;
- else
- port = MLX5_LAG_P2;
+ for (i = 0; i < ldev->ports; i++) {
+ if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
+ port = i;
+ break;
+ }
+ }
- port = ldev->v2p_map[port];
+ port = ldev->v2p_map[port * ldev->buckets];
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return port;
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return 0;
+
+ return ldev->ports;
+}
+EXPORT_SYMBOL(mlx5_lag_get_num_ports);
+
struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!ldev)
goto unlock;
@@ -1080,7 +1512,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
ldev->pf[MLX5_LAG_P1].dev;
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return peer_dev;
}
EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
@@ -1091,8 +1523,9 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
size_t *offsets)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
- struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+ struct mlx5_core_dev **mdev;
struct mlx5_lag *ldev;
+ unsigned long flags;
int num_ports;
int ret, i, j;
void *out;
@@ -1101,19 +1534,25 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
if (!out)
return -ENOMEM;
+ mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
+ if (!mdev) {
+ ret = -ENOMEM;
+ goto free_out;
+ }
+
memset(values, 0, sizeof(*values) * num_counters);
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
- num_ports = MLX5_MAX_PORTS;
- mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
- mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
+ num_ports = ldev->ports;
+ for (i = 0; i < ldev->ports; i++)
+ mdev[i] = ldev->pf[i].dev;
} else {
num_ports = 1;
mdev[MLX5_LAG_P1] = dev;
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
@@ -1123,13 +1562,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
out);
if (ret)
- goto free;
+ goto free_mdev;
for (j = 0; j < num_counters; ++j)
values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
}
-free:
+free_mdev:
+ kvfree(mdev);
+free_out:
kvfree(out);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index e5d231c31b54..ce2ce8ccbd70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -4,9 +4,13 @@
#ifndef __MLX5_LAG_H__
#define __MLX5_LAG_H__
+#include <linux/debugfs.h>
+
+#define MLX5_LAG_MAX_HASH_BUCKETS 16
#include "mlx5_core.h"
#include "mp.h"
#include "port_sel.h"
+#include "mpesw.h"
enum {
MLX5_LAG_P1,
@@ -14,20 +18,27 @@ enum {
};
enum {
- MLX5_LAG_FLAG_ROCE = 1 << 0,
- MLX5_LAG_FLAG_SRIOV = 1 << 1,
- MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
- MLX5_LAG_FLAG_READY = 1 << 3,
- MLX5_LAG_FLAG_HASH_BASED = 1 << 4,
+ MLX5_LAG_FLAG_NDEVS_READY,
};
-#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
- MLX5_LAG_FLAG_MULTIPATH | \
- MLX5_LAG_FLAG_HASH_BASED)
+enum {
+ MLX5_LAG_MODE_FLAG_HASH_BASED,
+ MLX5_LAG_MODE_FLAG_SHARED_FDB,
+ MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+};
+
+enum mlx5_lag_mode {
+ MLX5_LAG_MODE_NONE,
+ MLX5_LAG_MODE_ROCE,
+ MLX5_LAG_MODE_SRIOV,
+ MLX5_LAG_MODE_MULTIPATH,
+ MLX5_LAG_MODE_MPESW,
+};
struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
+ bool has_drop;
};
/* Used for collection of netdev event info. */
@@ -35,6 +46,7 @@ struct lag_tracker {
enum netdev_lag_tx_type tx_type;
struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
unsigned int is_bonded:1;
+ unsigned int has_inactive:1;
enum netdev_lag_hash hash_type;
};
@@ -42,20 +54,37 @@ struct lag_tracker {
* It serves both its phys functions.
*/
struct mlx5_lag {
- u8 flags;
+ enum mlx5_lag_mode mode;
+ unsigned long mode_flags;
+ unsigned long state_flags;
+ u8 ports;
+ u8 buckets;
int mode_changes_in_progress;
- bool shared_fdb;
- u8 v2p_map[MLX5_MAX_PORTS];
+ u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
+ struct work_struct mpesw_work;
struct notifier_block nb;
struct lag_mp lag_mp;
struct mlx5_lag_port_sel port_sel;
+ /* Protect lag fields/state changes */
+ struct mutex lock;
+ struct lag_mpesw lag_mpesw;
};
+static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ MLX5_CAP_GEN(dev, num_lag_ports) < 2 ||
+ MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS)
+ return false;
+ return true;
+}
+
static inline struct mlx5_lag *
mlx5_lag_dev(struct mlx5_core_dev *dev)
{
@@ -65,22 +94,33 @@ mlx5_lag_dev(struct mlx5_core_dev *dev)
static inline bool
__mlx5_lag_is_active(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
+ return ldev->mode != MLX5_LAG_MODE_NONE;
}
static inline bool
mlx5_lag_is_ready(struct mlx5_lag *ldev)
{
- return ldev->flags & MLX5_LAG_FLAG_READY;
+ return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
}
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags,
+ enum mlx5_lag_mode mode,
bool shared_fdb);
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
+bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev);
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
+
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags);
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
+void mlx5_ldev_remove_debugfs(struct dentry *dbg);
+void mlx5_disable_lag(struct mlx5_lag *ldev);
#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index bf4d3cbefa63..0259a149a64c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -11,7 +11,7 @@
static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
+ return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
}
static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
@@ -50,7 +50,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
enum mlx5_lag_port_affinity port)
{
- struct lag_tracker tracker;
+ struct lag_tracker tracker = {};
if (!__mlx5_lag_is_multipath(ldev))
return;
@@ -100,6 +100,14 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
flush_workqueue(mp->wq);
}
+static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
+{
+ mp->fib.mfi = fi;
+ mp->fib.priority = fi->fib_priority;
+ mp->fib.dst = dst;
+ mp->fib.dst_len = dst_len;
+}
+
struct mlx5_fib_event_work {
struct work_struct work;
struct mlx5_lag *ldev;
@@ -110,10 +118,10 @@ struct mlx5_fib_event_work {
};
};
-static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
- unsigned long event,
- struct fib_info *fi)
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
+ struct fib_entry_notifier_info *fen_info)
{
+ struct fib_info *fi = fen_info->fi;
struct lag_mp *mp = &ldev->lag_mp;
struct fib_nh *fib_nh0, *fib_nh1;
unsigned int nhs;
@@ -121,11 +129,17 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
/* stop track */
- if (mp->mfi == fi)
- mp->mfi = NULL;
+ if (mp->fib.mfi == fi)
+ mp->fib.mfi = NULL;
return;
}
+ /* Handle multipath entry with lower priority value */
+ if (mp->fib.mfi && mp->fib.mfi != fi &&
+ (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
+ fi->fib_priority >= mp->fib.priority)
+ return;
+
/* Handle add/replace event */
nhs = fib_info_num_path(fi);
if (nhs == 1) {
@@ -135,12 +149,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
if (i < 0)
- i = MLX5_LAG_NORMAL_AFFINITY;
- else
- ++i;
+ return;
+ i++;
mlx5_lag_set_port_affinity(ldev, i);
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
+
return;
}
@@ -160,15 +175,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
/* First time we see multipath route */
- if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
+ if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
struct lag_tracker tracker;
tracker = ldev->tracker;
- mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
- mp->mfi = fi;
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
@@ -179,7 +194,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
struct lag_mp *mp = &ldev->lag_mp;
/* Check the nh event is related to the route */
- if (!mp->mfi || mp->mfi != fi)
+ if (!mp->fib.mfi || mp->fib.mfi != fi)
return;
/* nh added/removed */
@@ -209,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work)
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
mlx5_lag_fib_route_event(ldev, fib_work->event,
- fib_work->fen_info.fi);
+ &fib_work->fen_info);
fib_info_put(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD:
@@ -268,10 +283,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
- if (fi->nh) {
- NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
- return notifier_from_errno(-EINVAL);
- }
+ if (fi->nh)
+ return NOTIFY_DONE;
fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
@@ -310,7 +323,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
/* Clear mfi, as it might become stale when a route delete event
* has been missed, see mlx5_lag_fib_route_event().
*/
- ldev->lag_mp.mfi = NULL;
+ ldev->lag_mp.fib.mfi = NULL;
}
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
@@ -321,7 +334,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
/* always clear mfi, as it might become stale when a route delete event
* has been missed
*/
- mp->mfi = NULL;
+ mp->fib.mfi = NULL;
if (mp->fib_nb.notifier_call)
return 0;
@@ -351,5 +364,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
unregister_fib_notifier(&init_net, &mp->fib_nb);
destroy_workqueue(mp->wq);
mp->fib_nb.notifier_call = NULL;
- mp->mfi = NULL;
+ mp->fib.mfi = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
index 57af962cad29..056a066da604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
@@ -15,7 +15,12 @@ enum mlx5_lag_port_affinity {
struct lag_mp {
struct notifier_block fib_nb;
- struct fib_info *mfi; /* used in tracking fib events */
+ struct {
+ const void *mfi; /* used in tracking fib events */
+ u32 priority;
+ u32 dst;
+ int dst_len;
+ } fib;
struct workqueue_struct *wq;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
new file mode 100644
index 000000000000..f643202b29c6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <net/nexthop.h>
+#include "lag/lag.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+
+void mlx5_mpesw_work(struct work_struct *work)
+{
+ struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work);
+
+ mutex_lock(&ldev->lock);
+ mlx5_disable_lag(ldev);
+ mutex_unlock(&ldev->lock);
+}
+
+static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+
+ if (!queue_work(ldev->wq, &ldev->mpesw_work))
+ mlx5_core_warn(dev, "failed to queue work\n");
+}
+
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
+ ldev->mode == MLX5_LAG_MODE_MPESW)
+ mlx5_lag_disable_mpesw(dev);
+ mutex_unlock(&ldev->lock);
+}
+
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+ int err = 0;
+
+ if (!ldev)
+ return 0;
+
+ mutex_lock(&ldev->lock);
+ if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
+ goto out;
+
+ if (ldev->mode != MLX5_LAG_MODE_NONE) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
+ if (err)
+ mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
+
+out:
+ mutex_unlock(&ldev->lock);
+ return err;
+}
+
+int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
+{
+ struct mlx5_lag *ldev = mdev->priv.lag;
+
+ if (!netif_is_bond_master(out_dev) || !ldev)
+ return 0;
+
+ mutex_lock(&ldev->lock);
+ if (ldev->mode == MLX5_LAG_MODE_MPESW) {
+ mutex_unlock(&ldev->lock);
+ return -EOPNOTSUPP;
+ }
+ mutex_unlock(&ldev->lock);
+ return 0;
+}
+
+bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
+{
+ bool ret;
+
+ ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW;
+ return ret;
+}
+
+void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
+{
+ INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work);
+ atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
+}
+
+void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
+{
+ cancel_delayed_work_sync(&ldev->bond_work);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
new file mode 100644
index 000000000000..be4abcb8fcd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LAG_MPESW_H__
+#define __MLX5_LAG_MPESW_H__
+
+#include "lag.h"
+#include "mlx5_core.h"
+
+struct lag_mpesw {
+ struct work_struct mpesw_work;
+ atomic_t mpesw_rule_count;
+};
+
+void mlx5_mpesw_work(struct work_struct *work);
+int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
+bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+void mlx5_lag_mpesw_init(struct mlx5_lag *ldev);
+void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev);
+#else
+static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {}
+static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {}
+#endif
+
+#endif /* __MLX5_LAG_MPESW_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index a6592f9c3c05..d3a3fe4ce670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -12,7 +12,8 @@ enum {
static struct mlx5_flow_group *
mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
- struct mlx5_flow_definer *definer)
+ struct mlx5_flow_definer *definer,
+ u8 rules)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
@@ -25,7 +26,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
MLX5_SET(create_flow_group_in, in, match_definer_id,
mlx5_get_match_definer_id(definer));
MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_MAX_PORTS - 1);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1);
MLX5_SET(create_flow_group_in, in, group_type,
MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT);
@@ -36,7 +37,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
struct mlx5_lag_definer *lag_definer,
- u8 port1, u8 port2)
+ u8 *ports)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_flow_table_attr ft_attr = {};
@@ -44,8 +45,10 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_namespace *ns;
int err, i;
+ int idx;
+ int j;
- ft_attr.max_fte = MLX5_MAX_PORTS;
+ ft_attr.max_fte = ldev->ports * ldev->buckets;
ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER;
ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL);
@@ -61,7 +64,8 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
}
lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft,
- lag_definer->definer);
+ lag_definer->definer,
+ ft_attr.max_fte);
if (IS_ERR(lag_definer->fg)) {
err = PTR_ERR(lag_definer->fg);
goto destroy_ft;
@@ -70,19 +74,25 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.flags |= FLOW_ACT_NO_APPEND;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
- u8 affinity = i == 0 ? port1 : port2;
-
- dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
- vhca_id);
- lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft,
- NULL, &flow_act,
- &dest, 1);
- if (IS_ERR(lag_definer->rules[i])) {
- err = PTR_ERR(lag_definer->rules[i]);
- while (i--)
- mlx5_del_flow_rules(lag_definer->rules[i]);
- goto destroy_fg;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ u8 affinity;
+
+ idx = i * ldev->buckets + j;
+ affinity = ports[idx];
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
+ vhca_id);
+ lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
+ NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(lag_definer->rules[idx])) {
+ err = PTR_ERR(lag_definer->rules[idx]);
+ while (i--)
+ while (j--)
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ goto destroy_fg;
+ }
}
}
@@ -279,8 +289,7 @@ static int mlx5_lag_set_definer(u32 *match_definer_mask,
static struct mlx5_lag_definer *
mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
- enum mlx5_traffic_types tt, bool tunnel, u8 port1,
- u8 port2)
+ enum mlx5_traffic_types tt, bool tunnel, u8 *ports)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_lag_definer *lag_definer;
@@ -308,7 +317,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
goto free_mask;
}
- err = mlx5_lag_create_port_sel_table(ldev, lag_definer, port1, port2);
+ err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports);
if (err)
goto destroy_match_definer;
@@ -329,10 +338,16 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
struct mlx5_lag_definer *lag_definer)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
int i;
+ int j;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- mlx5_del_flow_rules(lag_definer->rules[i]);
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ }
+ }
mlx5_destroy_flow_group(lag_definer->fg);
mlx5_destroy_flow_table(lag_definer->ft);
mlx5_destroy_match_definer(dev, lag_definer->definer);
@@ -356,7 +371,7 @@ static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev)
static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
enum netdev_lag_hash hash_type,
- u8 port1, u8 port2)
+ u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
struct mlx5_lag_definer *lag_definer;
@@ -364,7 +379,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt,
- false, port1, port2);
+ false, ports);
if (IS_ERR(lag_definer)) {
err = PTR_ERR(lag_definer);
goto destroy_definers;
@@ -376,7 +391,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
lag_definer =
mlx5_lag_create_definer(ldev, hash_type, tt,
- true, port1, port2);
+ true, ports);
if (IS_ERR(lag_definer)) {
err = PTR_ERR(lag_definer);
goto destroy_definers;
@@ -505,7 +520,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
struct ttc_params ttc_params = {};
mlx5_lag_set_inner_ttc_params(ldev, &ttc_params);
- port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params);
+ port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params);
if (IS_ERR(port_sel->inner.ttc))
return PTR_ERR(port_sel->inner.ttc);
@@ -513,13 +528,13 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
}
int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
- enum netdev_lag_hash hash_type, u8 port1, u8 port2)
+ enum netdev_lag_hash hash_type, u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
int err;
set_tt_map(port_sel, hash_type);
- err = mlx5_lag_create_definers(ldev, hash_type, port1, port2);
+ err = mlx5_lag_create_definers(ldev, hash_type, ports);
if (err)
return err;
@@ -543,52 +558,62 @@ destroy_definers:
return err;
}
-static int
-mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
- struct mlx5_lag_definer **definers,
- u8 port1, u8 port2)
+static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *def,
+ u8 *ports)
{
- struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
struct mlx5_flow_destination dest = {};
+ int idx;
int err;
- int tt;
+ int i;
+ int j;
dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
- struct mlx5_flow_handle **rules = definers[tt]->rules;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ldev->v2p_map[i] == ports[i])
+ continue;
- if (ldev->v2p_map[MLX5_LAG_P1] != port1) {
- dest.vport.vhca_id =
- MLX5_CAP_GEN(ldev->pf[port1 - 1].dev, vhca_id);
- err = mlx5_modify_rule_destination(rules[MLX5_LAG_P1],
- &dest, NULL);
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
+ vhca_id);
+ err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
if (err)
return err;
}
+ }
- if (ldev->v2p_map[MLX5_LAG_P2] != port2) {
- dest.vport.vhca_id =
- MLX5_CAP_GEN(ldev->pf[port2 - 1].dev, vhca_id);
- err = mlx5_modify_rule_destination(rules[MLX5_LAG_P2],
- &dest, NULL);
- if (err)
- return err;
- }
+ return 0;
+}
+
+static int
+mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer **definers,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports);
+ if (err)
+ return err;
}
return 0;
}
-int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2)
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
int err;
err = mlx5_lag_modify_definers_destinations(ldev,
port_sel->outer.definers,
- port1, port2);
+ ports);
if (err)
return err;
@@ -597,7 +622,7 @@ int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2)
return mlx5_lag_modify_definers_destinations(ldev,
port_sel->inner.definers,
- port1, port2);
+ ports);
}
void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
index 6d15b28a42fc..5ec3af2a3ecd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
@@ -10,7 +10,10 @@ struct mlx5_lag_definer {
struct mlx5_flow_definer *definer;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
- struct mlx5_flow_handle *rules[MLX5_MAX_PORTS];
+ /* Each port has ldev->buckets number of rules and they are arrange in
+ * [port * buckets .. port * buckets + buckets) locations
+ */
+ struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
};
struct mlx5_lag_ttc {
@@ -27,22 +30,20 @@ struct mlx5_lag_port_sel {
#ifdef CONFIG_MLX5_ESWITCH
-int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2);
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports);
void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev);
int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
- enum netdev_lag_hash hash_type, u8 port1,
- u8 port2);
+ enum netdev_lag_hash hash_type, u8 *ports);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
enum netdev_lag_hash hash_type,
- u8 port1, u8 port2)
+ u8 *ports)
{
return 0;
}
-static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1,
- u8 port2)
+static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
{
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
new file mode 100644
index 000000000000..c971ff04dd04
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/transobj.h>
+#include "clock.h"
+#include "aso.h"
+#include "wq.h"
+
+struct mlx5_aso_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ struct mlx5_core_cq mcq;
+
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5_aso {
+ /* data path */
+ u16 cc;
+ u16 pc;
+
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5_aso_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ u32 sqn;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+
+} ____cacheline_aligned_in_smp;
+
+static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node,
+ void *cqc_data, struct mlx5_aso_cq *cq)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ struct mlx5_wq_param param;
+ int err;
+ u32 i;
+
+ param.buf_numa_node = numa_node;
+ param.db_numa_node = numa_node;
+
+ err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+
+ return 0;
+}
+
+static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data)
+{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ void *in, *cqc;
+ int inlen, eqn;
+ int err;
+
+ err = mlx5_vector2eqn(mdev, 0, &eqn);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node,
+ struct mlx5_aso_cq *cq)
+{
+ void *cqc_data;
+ int err;
+
+ cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
+ if (!cqc_data)
+ return -ENOMEM;
+
+ MLX5_SET(cqc, cqc_data, log_cq_size, 1);
+ MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD);
+
+ err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err);
+ goto err_out;
+ }
+
+ err = create_aso_cq(cq, cqc_data);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err);
+ goto err_free_cq;
+ }
+
+ kvfree(cqc_data);
+ return 0;
+
+err_free_cq:
+ mlx5_aso_free_cq(cq);
+err_out:
+ kvfree(cqc_data);
+ return err;
+}
+
+static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wq_param param;
+ int err;
+
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+
+ param.db_numa_node = numa_node;
+ param.buf_numa_node = numa_node;
+ err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ return 0;
+}
+
+static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ void *in, *sqc, *wq;
+ int inlen, err;
+ u8 ts_format;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * sq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
+ MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ ts_format = mlx5_is_real_time_sq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ void *in, *sqc;
+ int inlen, err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+ err = mlx5_core_modify_sq(mdev, sqn, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ int err;
+
+ err = create_aso_sq(mdev, pdn, sqc_data, sq);
+ if (err)
+ return err;
+
+ err = mlx5_aso_set_sq_rdy(mdev, sq->sqn);
+ if (err)
+ mlx5_core_destroy_sq(mdev, sq->sqn);
+
+ return err;
+}
+
+static void mlx5_aso_free_sq(struct mlx5_aso *sq)
+{
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5_aso_destroy_sq(struct mlx5_aso *sq)
+{
+ mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn);
+ mlx5_aso_free_sq(sq);
+}
+
+static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node,
+ u32 pdn, struct mlx5_aso *sq)
+{
+ void *sqc_data, *wq;
+ int err;
+
+ sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
+ if (!sqc_data)
+ return -ENOMEM;
+
+ wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, pdn);
+ MLX5_SET(wq, wq, log_wq_sz, 1);
+
+ err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err);
+ goto err_out;
+ }
+
+ err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err);
+ goto err_free_asosq;
+ }
+
+ mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn);
+
+ kvfree(sqc_data);
+ return 0;
+
+err_free_asosq:
+ mlx5_aso_free_sq(sq);
+err_out:
+ kvfree(sqc_data);
+ return err;
+}
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn)
+{
+ int numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ struct mlx5_aso *aso;
+ int err;
+
+ aso = kzalloc(sizeof(*aso), GFP_KERNEL);
+ if (!aso)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq);
+ if (err)
+ goto err_cq;
+
+ err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso);
+ if (err)
+ goto err_sq;
+
+ return aso;
+
+err_sq:
+ mlx5_aso_destroy_cq(&aso->cq);
+err_cq:
+ kfree(aso);
+ return ERR_PTR(err);
+}
+
+void mlx5_aso_destroy(struct mlx5_aso *aso)
+{
+ if (IS_ERR_OR_NULL(aso))
+ return;
+
+ mlx5_aso_destroy_sq(aso);
+ mlx5_aso_destroy_cq(&aso->cq);
+ kfree(aso);
+}
+
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+ struct mlx5_aso_wqe *aso_wqe,
+ u32 obj_id, u32 opc_mode)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) |
+ (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_ACCESS_ASO);
+ cseg->qpn_ds = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt);
+ cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ cseg->general_id = cpu_to_be32(obj_id);
+}
+
+void *mlx5_aso_get_wqe(struct mlx5_aso *aso)
+{
+ u16 pi;
+
+ pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc);
+ return mlx5_wq_cyc_get_wqe(&aso->wq, pi);
+}
+
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg)
+{
+ doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ if (with_data)
+ aso->pc += MLX5_ASO_WQEBBS_DATA;
+ else
+ aso->pc += MLX5_ASO_WQEBBS;
+ *aso->wq.db = cpu_to_be32(aso->pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map);
+
+ /* Ensure doorbell is written on uar_page before poll_cq */
+ WRITE_ONCE(doorbell_cseg, NULL);
+}
+
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data)
+{
+ struct mlx5_aso_cq *cq = &aso->cq;
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return -ETIMEDOUT;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ mlx5_cqwq_pop(&cq->wq);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ struct mlx5_err_cqe *err_cqe;
+
+ mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+
+ err_cqe = (struct mlx5_err_cqe *)cqe;
+ mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n",
+ err_cqe->vendor_err_synd);
+ mlx5_core_err(cq->mdev, "syndrome=%x\n",
+ err_cqe->syndrome);
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+ 16, 1, err_cqe,
+ sizeof(*err_cqe), false);
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ if (with_data)
+ aso->cc += MLX5_ASO_WQEBBS_DATA;
+ else
+ aso->cc += MLX5_ASO_WQEBBS;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
new file mode 100644
index 000000000000..2d40dcf9d42e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_ASO_H__
+#define __MLX5_LIB_ASO_H__
+
+#include <linux/mlx5/qp.h>
+#include "mlx5_core.h"
+
+#define MLX5_ASO_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB))
+#define MLX5_ASO_WQEBBS_DATA \
+ (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB))
+#define ASO_CTRL_READ_EN BIT(0)
+#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24
+#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS))
+
+struct mlx5_wqe_aso_ctrl_seg {
+ __be32 va_h;
+ __be32 va_l; /* include read_enable */
+ __be32 l_key;
+ u8 data_mask_mode;
+ u8 condition_1_0_operand;
+ u8 condition_1_0_offset;
+ u8 data_offset_condition_operand;
+ __be32 condition_0_data;
+ __be32 condition_0_mask;
+ __be32 condition_1_data;
+ __be32 condition_1_mask;
+ __be64 bitwise_data;
+ __be64 data_mask;
+};
+
+struct mlx5_wqe_aso_data_seg {
+ __be32 bytewise_data[16];
+};
+
+struct mlx5_aso_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_aso_ctrl_seg aso_ctrl;
+};
+
+struct mlx5_aso_wqe_data {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_aso_ctrl_seg aso_ctrl;
+ struct mlx5_wqe_aso_data_seg aso_data;
+};
+
+enum {
+ MLX5_ASO_LOGICAL_AND,
+ MLX5_ASO_LOGICAL_OR,
+};
+
+enum {
+ MLX5_ASO_ALWAYS_FALSE,
+ MLX5_ASO_ALWAYS_TRUE,
+ MLX5_ASO_EQUAL,
+ MLX5_ASO_NOT_EQUAL,
+ MLX5_ASO_GREATER_OR_EQUAL,
+ MLX5_ASO_LESSER_OR_EQUAL,
+ MLX5_ASO_LESSER,
+ MLX5_ASO_GREATER,
+ MLX5_ASO_CYCLIC_GREATER,
+ MLX5_ASO_CYCLIC_LESSER,
+};
+
+enum {
+ MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT,
+ MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE,
+ MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE,
+};
+
+enum {
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5,
+};
+
+struct mlx5_aso;
+
+void *mlx5_aso_get_wqe(struct mlx5_aso *aso);
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+ struct mlx5_aso_wqe *aso_wqe,
+ u32 obj_id, u32 opc_mode);
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg);
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data);
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn);
+void mlx5_aso_destroy(struct mlx5_aso *aso);
+#endif /* __MLX5_LIB_ASO_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 91e806c1aa21..d3a9ae80fd30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -65,6 +65,8 @@ enum {
MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4),
MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
+ MLX5_MTPPS_FS_NPPS_PERIOD = BIT(0x9),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa),
};
static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
@@ -72,6 +74,13 @@ static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
}
+static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_real_time_mode(mdev) &&
+ MLX5_CAP_MCAM_FEATURE(mdev, npps_period) &&
+ MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns));
+}
+
static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
{
return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
@@ -459,9 +468,95 @@ static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec)
return find_target_cycles(mdev, target_ns);
}
-static u64 perout_conf_real_time(s64 sec)
+static u64 perout_conf_real_time(s64 sec, u32 nsec)
+{
+ return (u64)nsec | (u64)sec << 32;
+}
+
+static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+ u64 *time_stamp, bool real_time)
+{
+ struct timespec64 ts;
+ s64 ns;
+
+ ts.tv_nsec = rq->perout.period.nsec;
+ ts.tv_sec = rq->perout.period.sec;
+ ns = timespec64_to_ns(&ts);
+
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+
+ *time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) :
+ perout_conf_internal_timer(mdev, rq->perout.start.sec);
+
+ return 0;
+}
+
+#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1)
+static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
+ struct ptp_clock_request *rq,
+ u32 *out_pulse_duration_ns)
{
- return (u64)sec << 32;
+ struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ u32 out_pulse_duration;
+ struct timespec64 ts;
+
+ if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) {
+ ts.tv_sec = rq->perout.on.sec;
+ ts.tv_nsec = rq->perout.on.nsec;
+ out_pulse_duration = (u32)timespec64_to_ns(&ts);
+ } else {
+ /* out_pulse_duration_ns should be up to 50% of the
+ * pulse period as default
+ */
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1;
+ }
+
+ if (out_pulse_duration < pps_info->min_out_pulse_duration_ns ||
+ out_pulse_duration > MLX5_MAX_PULSE_DURATION) {
+ mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n",
+ out_pulse_duration, pps_info->min_out_pulse_duration_ns,
+ MLX5_MAX_PULSE_DURATION);
+ return -EINVAL;
+ }
+ *out_pulse_duration_ns = out_pulse_duration;
+
+ return 0;
+}
+
+static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+ u32 *field_select, u32 *out_pulse_duration_ns,
+ u64 *period, u64 *time_stamp)
+{
+ struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct ptp_clock_time *time = &rq->perout.start;
+ struct timespec64 ts;
+
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ if (timespec64_to_ns(&ts) < pps_info->min_npps_period) {
+ mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n",
+ pps_info->min_npps_period);
+ return -EINVAL;
+ }
+ *period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec);
+
+ if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns))
+ return -EINVAL;
+
+ *time_stamp = perout_conf_real_time(time->sec, time->nsec);
+ *field_select |= MLX5_MTPPS_FS_NPPS_PERIOD |
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS;
+
+ return 0;
+}
+
+static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags)
+{
+ return ((!mlx5_npps_real_time_supported(mdev) && flags) ||
+ (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE));
}
static int mlx5_perout_configure(struct ptp_clock_info *ptp,
@@ -474,20 +569,20 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
container_of(clock, struct mlx5_core_dev, clock);
bool rt_mode = mlx5_real_time_mode(mdev);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
- struct timespec64 ts;
+ u32 out_pulse_duration_ns = 0;
u32 field_select = 0;
+ u64 npps_period = 0;
u64 time_stamp = 0;
u8 pin_mode = 0;
u8 pattern = 0;
int pin = -1;
int err = 0;
- s64 ns;
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
/* Reject requests with unsupported flags */
- if (rq->perout.flags)
+ if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
return -EOPNOTSUPP;
if (rq->perout.index >= clock->ptp_info.n_pins)
@@ -500,29 +595,25 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
if (on) {
bool rt_mode = mlx5_real_time_mode(mdev);
- s64 sec = rq->perout.start.sec;
-
- if (rq->perout.start.nsec)
- return -EINVAL;
pin_mode = MLX5_PIN_MODE_OUT;
pattern = MLX5_OUT_PATTERN_PERIODIC;
- ts.tv_sec = rq->perout.period.sec;
- ts.tv_nsec = rq->perout.period.nsec;
- ns = timespec64_to_ns(&ts);
- if ((ns >> 1) != 500000000LL)
+ if (rt_mode && rq->perout.start.sec > U32_MAX)
return -EINVAL;
- if (rt_mode && sec > U32_MAX)
- return -EINVAL;
-
- time_stamp = rt_mode ? perout_conf_real_time(sec) :
- perout_conf_internal_timer(mdev, sec);
-
field_select |= MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
MLX5_MTPPS_FS_TIME_STAMP;
+
+ if (mlx5_npps_real_time_supported(mdev))
+ err = perout_conf_npps_real_time(mdev, rq, &field_select,
+ &out_pulse_duration_ns, &npps_period,
+ &time_stamp);
+ else
+ err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
+ if (err)
+ return err;
}
MLX5_SET(mtpps_reg, in, pin, pin);
@@ -531,7 +622,8 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
MLX5_SET(mtpps_reg, in, enable, on);
MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
MLX5_SET(mtpps_reg, in, field_select, field_select);
-
+ MLX5_SET64(mtpps_reg, in, npps_period, npps_period);
+ MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
return err;
@@ -687,6 +779,13 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
cap_max_num_of_pps_out_pins);
+ if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period))
+ clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out,
+ cap_log_min_npps_period);
+ if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns))
+ clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out,
+ cap_log_min_out_pulse_duration_ns);
+
clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index bced2efe9bef..adefde3ea941 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -14,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component {
struct {
void *data;
- } device[MLX5_MAX_PORTS];
+ } device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler;
struct rw_semaphore sem;
@@ -25,7 +25,7 @@ struct mlx5_devcom_list {
struct list_head list;
struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
- struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+ struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED];
};
struct mlx5_devcom {
@@ -74,13 +74,15 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!mlx5_core_is_pf(dev))
return NULL;
+ if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
+ return NULL;
sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL;
idx = -1;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
if (iter->devs[i])
tmp_dev = iter->devs[i];
else
@@ -134,11 +136,11 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
kfree(devcom);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (priv->devs[i])
break;
- if (i != MLX5_MAX_PORTS)
+ if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
return;
list_del(&priv->list);
@@ -191,7 +193,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx && comp->device[i].data) {
err = comp->handler(event, comp->device[i].data,
event_data);
@@ -239,7 +241,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
return NULL;
}
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx)
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index 939d5bf1581b..94313c18bb64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -6,6 +6,8 @@
#include <linux/mlx5/driver.h>
+#define MLX5_DEVCOM_PORTS_SUPPORTED 2
+
enum mlx5_devcom_components {
MLX5_DEVCOM_ESW_OFFLOADS,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
index 3d5e57ff558c..9482e51ac82a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -12,13 +12,16 @@ struct mlx5_dm {
spinlock_t lock;
unsigned long *steering_sw_icm_alloc_blocks;
unsigned long *header_modify_sw_icm_alloc_blocks;
+ unsigned long *header_modify_pattern_sw_icm_alloc_blocks;
};
struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
{
+ u64 header_modify_pattern_icm_blocks = 0;
u64 header_modify_icm_blocks = 0;
u64 steering_icm_blocks = 0;
struct mlx5_dm *dm;
+ bool support_v2;
if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
return NULL;
@@ -35,8 +38,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
dm->steering_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(steering_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
+ bitmap_zalloc(steering_icm_blocks, GFP_KERNEL);
if (!dm->steering_sw_icm_alloc_blocks)
goto err_steering;
}
@@ -47,16 +49,33 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
dm->header_modify_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
+ bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL);
if (!dm->header_modify_sw_icm_alloc_blocks)
goto err_modify_hdr;
}
+ support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) &&
+ MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address);
+
+ if (support_v2) {
+ header_modify_pattern_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_pattern_sw_icm_alloc_blocks =
+ bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL);
+ if (!dm->header_modify_pattern_sw_icm_alloc_blocks)
+ goto err_pattern;
+ }
+
return dm;
+err_pattern:
+ bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+
err_modify_hdr:
- kfree(dm->steering_sw_icm_alloc_blocks);
+ bitmap_free(dm->steering_sw_icm_alloc_blocks);
err_steering:
kfree(dm);
@@ -75,7 +94,7 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
- kfree(dm->steering_sw_icm_alloc_blocks);
+ bitmap_free(dm->steering_sw_icm_alloc_blocks);
}
if (dm->header_modify_sw_icm_alloc_blocks) {
@@ -83,7 +102,15 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
BIT(MLX5_CAP_DEV_MEM(dev,
log_header_modify_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
- kfree(dm->header_modify_sw_icm_alloc_blocks);
+ bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+ }
+
+ if (dm->header_modify_pattern_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks);
}
kfree(dm);
@@ -130,6 +157,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
log_header_modify_sw_icm_size);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
@@ -203,6 +237,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type
icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 97e5845b4cfd..df58cba37930 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -124,6 +124,10 @@ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
if (mlx5_chains_ignore_flow_level_supported(chains))
return UINT_MAX;
+ if (!chains->dev->priv.eswitch ||
+ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
+ return 1;
+
/* We should get here only for eswitch case */
return FDB_TC_MAX_PRIO;
}
@@ -208,7 +212,7 @@ static int
create_chain_restore(struct fs_chain *chain)
{
struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
- char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
+ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
struct mlx5_fs_chains *chains = chain->chains;
enum mlx5e_tc_attr_to_reg chain_to_reg;
struct mlx5_modify_hdr *mod_hdr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
index b63dec24747a..b78f2ba25c19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
for (tt = 0; tt < MLX5_NUM_TT; tt++) {
struct mlx5_ttc_rule *rule = &rules[tt];
+ if (test_bit(tt, params->ignore_dests))
+ continue;
rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
&params->dests[tt],
ttc_rules[tt].etype,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index 4bad6a5fde56..f240ffe5116c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -92,13 +92,6 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
{
}
-
-static inline int
-mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
- void *buf, int len)
-{
- return 0;
-}
#endif
#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 2f536c5d30b1..032adb21ad4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -83,6 +83,7 @@ int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, voi
enum {
MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS,
MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC,
+ MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC,
};
int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index 839a01da110f..8ff16318e32d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -122,7 +122,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_mpfs *mpfs = dev->priv.mpfs;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!mpfs)
return;
WARN_ON(!hlist_empty(mpfs->hash));
@@ -137,7 +137,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
int err = 0;
u32 index;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!mpfs)
return 0;
mutex_lock(&mpfs->lock);
@@ -185,7 +185,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
int err = 0;
u32 index;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!mpfs)
return 0;
mutex_lock(&mpfs->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
index e042e0924079..4571c56ec3c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/port.h>
#include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
new file mode 100644
index 000000000000..9b8c051ccf65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+#include "smfs.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec)
+{
+ struct mlx5dr_match_parameters matcher_mask = {};
+
+ matcher_mask.match_buf = (u64 *)&spec->match_criteria;
+ matcher_mask.match_sz = DR_SZ_MATCH_PARAM;
+
+ return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask);
+}
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+ mlx5dr_matcher_destroy(matcher);
+}
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+ return mlx5dr_table_get_from_fs_ft(ft);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table)
+{
+ return mlx5dr_action_create_dest_table(table);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id)
+{
+ return mlx5dr_action_create_flow_counter(counter_id);
+}
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action)
+{
+ mlx5dr_action_destroy(action);
+}
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+ size_t num_actions, struct mlx5dr_action *actions[],
+ u32 flow_source)
+{
+ struct mlx5dr_match_parameters value = {};
+
+ value.match_buf = (u64 *)spec->match_value;
+ value.match_sz = DR_SZ_MATCH_PARAM;
+
+ return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source);
+}
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule)
+{
+ mlx5dr_rule_destroy(rule);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
new file mode 100644
index 000000000000..452d0df339ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_LIB_SMFS_H__
+#define __MLX5_LIB_SMFS_H__
+
+#include "steering/mlx5dr.h"
+#include "steering/dr_types.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec);
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id);
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action);
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+ size_t num_actions, struct mlx5dr_action *actions[],
+ u32 flow_source);
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule);
+
+#endif /* __MLX5_LIB_SMFS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
index c1df0d3595d8..696e45e2bd06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -10,6 +10,7 @@ struct mlx5_timeouts {
static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
[MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000,
+ [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000,
[MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
[MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
[MLX5_TO_FW_INIT_MS] = 2000,
@@ -31,20 +32,17 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type
dev->timeouts->to[type] = val;
}
-void mlx5_tout_set_def_val(struct mlx5_core_dev *dev)
+int mlx5_tout_init(struct mlx5_core_dev *dev)
{
int i;
- for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
- tout_set(dev, tout_def_sw_val[i], i);
-}
-
-int mlx5_tout_init(struct mlx5_core_dev *dev)
-{
dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL);
if (!dev->timeouts)
return -ENOMEM;
+ for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
+ tout_set(dev, tout_def_sw_val[i], i);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
index 1c42ead782fa..bc9e9aeda847 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -7,6 +7,7 @@
enum mlx5_timeouts_types {
/* pre init timeouts (not read from FW) */
MLX5_TO_FW_PRE_INIT_TIMEOUT_MS,
+ MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS,
MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS,
MLX5_TO_FW_PRE_INIT_WAIT_MS,
@@ -34,7 +35,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev);
void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
-void mlx5_tout_set_def_val(struct mlx5_core_dev *dev);
u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index e3b0a131c3e1..d55e15c1f380 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/refcount.h>
#include <linux/mlx5/driver.h>
#include <net/vxlan.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7df9c7f8d9c8..283c4cc28944 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -62,9 +62,7 @@
#include "lib/mlx5.h"
#include "lib/tout.h"
#include "fpga/core.h"
-#include "fpga/ipsec.h"
-#include "accel/ipsec.h"
-#include "accel/tls.h"
+#include "en_accel/ipsec.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
#include "lib/geneve.h"
@@ -92,12 +90,16 @@ module_param_named(prof_sel, prof_sel, uint, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
static u32 sw_owner_id[4];
+#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1)
+static DEFINE_IDA(sw_vhca_ida);
enum {
MLX5_ATOMIC_REQ_MODE_BE = 0x0,
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
};
+#define LOG_MAX_SUPPORTED_QPS 0xff
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
@@ -109,7 +111,7 @@ static struct mlx5_profile profile[] = {
[2] = {
.mask = MLX5_PROF_MASK_QP_SIZE |
MLX5_PROF_MASK_MR_CACHE,
- .log_max_qp = 18,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
.mr_cache[0] = {
.size = 500,
.limit = 250
@@ -177,30 +179,30 @@ static struct mlx5_profile profile[] = {
},
};
-static int fw_initializing(struct mlx5_core_dev *dev)
-{
- return ioread32be(&dev->iseg->initializing) >> 31;
-}
-
static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
u32 warn_time_mili)
{
unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+ u32 fw_initializing;
int err = 0;
- while (fw_initializing(dev)) {
- if (time_after(jiffies, end)) {
+ do {
+ fw_initializing = ioread32be(&dev->iseg->initializing);
+ if (!(fw_initializing >> 31))
+ break;
+ if (time_after(jiffies, end) ||
+ test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
err = -EBUSY;
break;
}
if (warn_time_mili && time_after(jiffies, warn)) {
- mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
- jiffies_to_msecs(end - warn) / 1000);
+ mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n",
+ jiffies_to_msecs(end - warn) / 1000, fw_initializing);
warn = jiffies + msecs_to_jiffies(warn_time_mili);
}
msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
- }
+ } while (true);
return err;
}
@@ -314,13 +316,6 @@ struct mlx5_reg_host_endianness {
u8 rsvd[15];
};
-#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
-
-enum {
- MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
- MLX5_DEV_CAP_FLAG_DCT,
-};
-
static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
{
switch (size) {
@@ -484,10 +479,69 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
}
+static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
+ return err;
+}
+
+bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ &val);
+
+ if (!err)
+ return val.vbool;
+
+ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
+ return MLX5_CAP_GEN(dev, roce);
+}
+EXPORT_SYMBOL(mlx5_is_roce_on);
+
+static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
+ if (err)
+ return err;
+
+ if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) ||
+ !(dev->priv.sw_vhca_id > 0))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur,
+ MLX5_ST_SZ_BYTES(cmd_hca_cap_2));
+ MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1);
+
+ return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2);
+}
+
static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
{
struct mlx5_profile *prof = &dev->profile;
void *set_hca_cap;
+ int max_uc_list;
int err;
err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
@@ -507,7 +561,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
to_fw_pkey_sz(dev, 128));
/* Check log_max_qp from HCA caps to set in current profile */
- if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
+ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
+ prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp));
+ } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
prof->log_max_qp,
MLX5_CAP_GEN_MAX(dev, log_max_qp));
@@ -559,7 +615,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
if (MLX5_CAP_GEN(dev, roce_rw_supported))
- MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
+ MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
+ mlx5_is_roce_on(dev));
+
+ max_uc_list = max_uc_list_get_devlink_param(dev);
+ if (max_uc_list > 0)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
+ ilog2(max_uc_list));
return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
}
@@ -580,7 +642,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
*/
static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
{
- return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
+ return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
}
@@ -609,6 +671,33 @@ static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
return err;
}
+static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev,
+ void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, port_selection_cap))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) ||
+ !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur,
+ MLX5_ST_SZ_BYTES(port_selection_cap));
+ MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1);
+
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION);
+
+ return err;
+}
+
static int set_hca_cap(struct mlx5_core_dev *dev)
{
int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
@@ -646,6 +735,20 @@ static int set_hca_cap(struct mlx5_core_dev *dev)
goto out;
}
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_2(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_port_selection(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n");
+ goto out;
+ }
+
out:
kfree(set_ctx);
return err;
@@ -711,10 +814,9 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
if (err) {
- u32 syndrome;
- u8 status;
+ u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome);
+ u8 status = MLX5_GET(query_issi_out, query_out, status);
- mlx5_cmd_mbox_status(query_out, &status, &syndrome);
if (!status || syndrome == MLX5_DRIVER_SYND) {
mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
err, status, syndrome);
@@ -914,6 +1016,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_sf_table_cleanup;
}
+ err = mlx5_fs_core_alloc(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc flow steering\n");
+ goto err_fs;
+ }
+
dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
@@ -924,6 +1032,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
return 0;
+err_fs:
+ mlx5_sf_table_cleanup(dev);
err_sf_table_cleanup:
mlx5_sf_hw_table_cleanup(dev);
err_sf_hw_table_cleanup:
@@ -961,6 +1071,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_dm_cleanup(dev);
+ mlx5_fs_core_free(dev);
mlx5_sf_table_cleanup(dev);
mlx5_sf_hw_table_cleanup(dev);
mlx5_vhca_event_cleanup(dev);
@@ -981,7 +1092,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout)
{
int err;
@@ -992,15 +1103,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
if (mlx5_core_is_pf(dev))
pcie_print_link_status(dev->pdev);
- mlx5_tout_set_def_val(dev);
-
/* wait for firmware to accept initialization segments configurations
*/
- err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT),
+ err = wait_fw_init(dev, timeout,
mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
if (err) {
mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
- mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
+ timeout);
return err;
}
@@ -1021,10 +1130,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
+ mlx5_start_health_poll(dev);
+
err = mlx5_core_enable_hca(dev, 0);
if (err) {
mlx5_core_err(dev, "enable hca failed\n");
- goto err_cmd_cleanup;
+ goto stop_health_poll;
}
err = mlx5_core_set_issi(dev);
@@ -1076,8 +1187,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
mlx5_core_err(dev, "query hca failed\n");
goto reclaim_boot_pages;
}
-
- mlx5_start_health_poll(dev);
+ mlx5_start_health_fw_log_up(dev);
return 0;
@@ -1085,6 +1195,8 @@ reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
err_disable_hca:
mlx5_core_disable_hca(dev, 0);
+stop_health_poll:
+ mlx5_stop_health_poll(dev, boot);
err_cmd_cleanup:
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
@@ -1096,7 +1208,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
{
int err;
- mlx5_stop_health_poll(dev, boot);
err = mlx5_cmd_teardown_hca(dev);
if (err) {
mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
@@ -1104,6 +1215,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
}
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
+ mlx5_stop_health_poll(dev, boot);
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
@@ -1159,15 +1271,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fpga_start;
}
- mlx5_accel_ipsec_init(dev);
-
- err = mlx5_accel_tls_init(dev);
- if (err) {
- mlx5_core_err(dev, "TLS device start failed %d\n", err);
- goto err_tls_start;
- }
-
- err = mlx5_init_fs(dev);
+ err = mlx5_fs_core_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init flow steering\n");
goto err_fs;
@@ -1212,11 +1316,8 @@ err_ec:
err_vhca:
mlx5_vhca_event_stop(dev);
err_set_hca:
- mlx5_cleanup_fs(dev);
+ mlx5_fs_core_cleanup(dev);
err_fs:
- mlx5_accel_tls_cleanup(dev);
-err_tls_start:
- mlx5_accel_ipsec_cleanup(dev);
mlx5_fpga_device_stop(dev);
err_fpga_start:
mlx5_rsc_dump_cleanup(dev);
@@ -1237,13 +1338,12 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
{
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
+ mlx5_eswitch_disable(dev->priv.eswitch);
mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
- mlx5_cleanup_fs(dev);
- mlx5_accel_ipsec_cleanup(dev);
- mlx5_accel_tls_cleanup(dev);
+ mlx5_fs_core_cleanup(dev);
mlx5_fpga_device_stop(dev);
mlx5_rsc_dump_cleanup(dev);
mlx5_hv_vhca_cleanup(dev->hv_vhca);
@@ -1258,12 +1358,14 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
int mlx5_init_one(struct mlx5_core_dev *dev)
{
+ struct devlink *devlink = priv_to_devlink(dev);
int err = 0;
+ devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, true);
+ err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
if (err)
goto err_function;
@@ -1288,6 +1390,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
goto err_register;
mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
return 0;
err_register:
@@ -1302,11 +1405,15 @@ function_teardown:
err_function:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
return err;
}
void mlx5_uninit_one(struct mlx5_core_dev *dev)
{
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
mlx5_unregister_device(dev);
@@ -1325,12 +1432,15 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mlx5_function_teardown(dev, true);
out:
mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
}
-int mlx5_load_one(struct mlx5_core_dev *dev)
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
{
int err = 0;
+ u64 timeout;
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_core_warn(dev, "interface is up, NOP\n");
@@ -1339,7 +1449,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev)
/* remove any previous indication of internal error */
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, false);
+ if (recovery)
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
+ else
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
+ err = mlx5_function_setup(dev, false, timeout);
if (err)
goto err_function;
@@ -1368,8 +1482,20 @@ out:
return err;
}
-void mlx5_unload_one(struct mlx5_core_dev *dev)
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ int ret;
+
+ devl_lock(devlink);
+ ret = mlx5_load_one_devl_locked(dev, recovery);
+ devl_unlock(devlink);
+ return ret;
+}
+
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev)
{
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&dev->intf_state_mutex);
mlx5_detach_device(dev);
@@ -1387,6 +1513,15 @@ out:
mutex_unlock(&dev->intf_state_mutex);
}
+void mlx5_unload_one(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ devl_lock(devlink);
+ mlx5_unload_one_devl_locked(dev);
+ devl_unlock(devlink);
+}
+
static const int types[] = {
MLX5_CAP_GENERAL,
MLX5_CAP_GENERAL_2,
@@ -1409,6 +1544,8 @@ static const int types[] = {
MLX5_CAP_IPSEC,
MLX5_CAP_PORT_SELECTION,
MLX5_CAP_DEV_SHAMPO,
+ MLX5_CAP_MACSEC,
+ MLX5_CAP_ADV_VIRTUALIZATION,
};
static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
@@ -1451,7 +1588,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ lockdep_register_key(&dev->lock_key);
mutex_init(&dev->intf_state_mutex);
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1463,8 +1602,8 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
INIT_LIST_HEAD(&priv->pgdir_list);
priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
- priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
- mlx5_debugfs_root);
+ priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device),
+ mlx5_debugfs_root);
INIT_LIST_HEAD(&priv->traps);
err = mlx5_tout_init(dev);
@@ -1489,6 +1628,18 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
if (err)
goto err_hca_caps;
+ /* The conjunction of sw_vhca_id with sw_owner_id will be a global
+ * unique id per function which uses mlx5_core.
+ * Those values are supplied to FW as part of the init HCA command to
+ * be used by both driver and FW when it's applicable.
+ */
+ dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1,
+ MAX_SW_VHCA_ID,
+ GFP_KERNEL);
+ if (dev->priv.sw_vhca_id < 0)
+ mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n",
+ dev->priv.sw_vhca_id);
+
return 0;
err_hca_caps:
@@ -1500,12 +1651,13 @@ err_pagealloc_init:
err_health_init:
mlx5_tout_cleanup(dev);
err_timeout_init:
- debugfs_remove(dev->priv.dbg_root);
+ debugfs_remove(dev->priv.dbg.dbg_root);
mutex_destroy(&priv->pgdir_mutex);
mutex_destroy(&priv->alloc_mutex);
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
return err;
}
@@ -1513,17 +1665,21 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
+ if (priv->sw_vhca_id > 0)
+ ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id);
+
mlx5_hca_caps_free(dev);
mlx5_adev_cleanup(dev);
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_tout_cleanup(dev);
- debugfs_remove_recursive(dev->priv.dbg_root);
+ debugfs_remove_recursive(dev->priv.dbg.dbg_root);
mutex_destroy(&priv->pgdir_mutex);
mutex_destroy(&priv->alloc_mutex);
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
}
static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -1594,7 +1750,13 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
+ /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+ * fw_reset before unregistering the devlink.
+ */
+ mlx5_drain_fw_reset(dev);
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
devlink_unregister(devlink);
+ mlx5_sriov_disable(pdev);
mlx5_crdump_disable(dev);
mlx5_drain_health_wq(dev);
mlx5_uninit_one(dev);
@@ -1604,12 +1766,28 @@ static void remove_one(struct pci_dev *pdev)
mlx5_devlink_free(devlink);
}
+#define mlx5_pci_trace(dev, fmt, ...) ({ \
+ struct mlx5_core_dev *__dev = (dev); \
+ mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
+ __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
+ __dev->pci_status, ##__VA_ARGS__); \
+})
+
+static const char *result2str(enum pci_ers_result result)
+{
+ return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
+ result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
+ result == PCI_ERS_RESULT_RECOVERED ? "recovered" :
+ "unknown";
+}
+
static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ enum pci_ers_result res;
- mlx5_core_info(dev, "%s was called\n", __func__);
+ mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
mlx5_enter_error_state(dev, false);
mlx5_error_sw_reset(dev);
@@ -1617,8 +1795,11 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
- return state == pci_channel_io_perm_failure ?
+ res = state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+
+ mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res));
+ return res;
}
/* wait for the device to show vital signs by waiting
@@ -1652,28 +1833,34 @@ static int wait_vital(struct pci_dev *pdev)
static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
{
+ enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- mlx5_core_info(dev, "%s was called\n", __func__);
+ mlx5_pci_trace(dev, "Enter\n");
err = mlx5_pci_enable_device(dev);
if (err) {
mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
__func__, err);
- return PCI_ERS_RESULT_DISCONNECT;
+ goto out;
}
pci_set_master(pdev);
pci_restore_state(pdev);
pci_save_state(pdev);
- if (wait_vital(pdev)) {
- mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
- return PCI_ERS_RESULT_DISCONNECT;
+ err = wait_vital(pdev);
+ if (err) {
+ mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
+ __func__, err);
+ goto out;
}
- return PCI_ERS_RESULT_RECOVERED;
+ res = PCI_ERS_RESULT_RECOVERED;
+out:
+ mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res));
+ return res;
}
static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1681,14 +1868,16 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- mlx5_core_info(dev, "%s was called\n", __func__);
+ mlx5_pci_trace(dev, "Enter, loading driver..\n");
- err = mlx5_load_one(dev);
- if (err)
- mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
- __func__, err);
- else
- mlx5_core_info(dev, "%s: device recovered\n", __func__);
+ err = mlx5_load_one(dev, false);
+
+ if (!err)
+ devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
+ mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
+ !err ? "recovered" : "Failed");
}
static const struct pci_error_handlers mlx5_err_handler = {
@@ -1717,7 +1906,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
}
/* Panic tear down fw command will stop the PCI bus communication
- * with the HCA, so the health polll is no longer needed.
+ * with the HCA, so the health poll is no longer needed.
*/
mlx5_drain_health_wq(dev);
mlx5_stop_health_poll(dev, false);
@@ -1753,6 +1942,7 @@ static void shutdown(struct pci_dev *pdev)
int err;
mlx5_core_info(dev, "Shutdown was called\n");
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
err = mlx5_try_fast_unload(dev);
if (err)
mlx5_unload_one(dev);
@@ -1772,7 +1962,7 @@ static int mlx5_resume(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- return mlx5_load_one(dev);
+ return mlx5_load_one(dev, false);
}
static const struct pci_device_id mlx5_core_pci_table[] = {
@@ -1792,10 +1982,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
{ PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
+ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */
{ 0, }
};
@@ -1804,17 +1996,18 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
void mlx5_disable_device(struct mlx5_core_dev *dev)
{
mlx5_error_sw_reset(dev);
- mlx5_unload_one(dev);
+ mlx5_unload_one_devl_locked(dev);
}
int mlx5_recover_device(struct mlx5_core_dev *dev)
{
- int ret = -EIO;
+ if (!mlx5_core_is_sf(dev)) {
+ mlx5_pci_disable_device(dev);
+ if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
+ return -EIO;
+ }
- mlx5_pci_disable_device(dev);
- if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
- ret = mlx5_load_one(dev);
- return ret;
+ return mlx5_load_one_devl_locked(dev, true);
}
static struct pci_driver mlx5_core_driver = {
@@ -1831,6 +2024,48 @@ static struct pci_driver mlx5_core_driver = {
.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
};
+/**
+ * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
+ * mlx5_core is its driver.
+ * @pdev: The associated PCI device.
+ *
+ * Upon return the interface state lock stay held to let caller uses it safely.
+ * Caller must ensure to use the returned mlx5 device for a narrow window
+ * and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
+ *
+ * Return: Pointer to the associated mlx5_core_dev or NULL.
+ */
+struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
+ if (IS_ERR(mdev))
+ return NULL;
+
+ mutex_lock(&mdev->intf_state_mutex);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
+ mutex_unlock(&mdev->intf_state_mutex);
+ return NULL;
+ }
+
+ return mdev;
+}
+EXPORT_SYMBOL(mlx5_vf_get_core_dev);
+
+/**
+ * mlx5_vf_put_core_dev - Put the mlx5 core device back.
+ * @mdev: The mlx5 core device.
+ *
+ * Upon return the interface state lock is unlocked and caller should not
+ * access the mdev any more.
+ */
+void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
+{
+ mutex_unlock(&mdev->intf_state_mutex);
+}
+EXPORT_SYMBOL(mlx5_vf_put_core_dev);
+
static void mlx5_core_verify_params(void)
{
if (prof_sel >= ARRAY_SIZE(profile)) {
@@ -1852,7 +2087,6 @@ static int __init init(void)
get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
mlx5_core_verify_params();
- mlx5_fpga_ipsec_build_fs_cmds();
mlx5_register_debugfs();
err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index e019d68062d8..495cca58dccc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <rdma/ib_verbs.h>
#include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index bb677329ea08..a806e3de7b7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -143,6 +143,36 @@ enum mlx5_semaphore_space_address {
#define MLX5_DEFAULT_PROF 2
+static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed,
+ size_t item_size, size_t num_items,
+ const char *func, int line)
+{
+ int inlen;
+
+ if (fixed > INT_MAX || item_size > INT_MAX || num_items > INT_MAX) {
+ mlx5_core_err(dev, "%s: %s:%d: input values too big: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ if (check_mul_overflow((int)item_size, (int)num_items, &inlen)) {
+ mlx5_core_err(dev, "%s: %s:%d: multiplication overflow: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ if (check_add_overflow((int)fixed, inlen, &inlen)) {
+ mlx5_core_err(dev, "%s: %s:%d: addition overflow: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ return inlen;
+}
+
+#define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \
+ mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__)
+
int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
int mlx5_query_board_id(struct mlx5_core_dev *dev);
int mlx5_cmd_init(struct mlx5_core_dev *dev);
@@ -164,6 +194,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+void mlx5_sriov_disable(struct pci_dev *pdev);
int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
@@ -176,7 +207,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
-void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
void mlx5_cmd_flush(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
@@ -209,7 +239,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev);
void mlx5_detach_device(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
@@ -290,7 +320,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
int mlx5_init_one(struct mlx5_core_dev *dev);
void mlx5_uninit_one(struct mlx5_core_dev *dev);
void mlx5_unload_one(struct mlx5_core_dev *dev);
-int mlx5_load_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev);
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
@@ -305,5 +337,6 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
bool mlx5_eth_supported(struct mlx5_core_dev *dev);
bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev);
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 8116815663a7..23cb63fa4588 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -22,12 +22,40 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int msix_vec_count);
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct cpumask *affinity);
-void mlx5_irq_release(struct mlx5_irq *irq);
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs);
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
+struct mlx5_irq_pool;
+#ifdef CONFIG_MLX5_SF
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs);
+struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask);
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs);
+#else
+static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev,
+ struct mlx5_irq **irqs, int num_irqs) {}
+#endif
#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index f099a087400e..9d735c343a3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index f6b5451328fc..60596357bfc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -32,7 +32,6 @@
#include <linux/highmem.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/delay.h>
#include <linux/mlx5/driver.h>
#include <linux/xarray.h>
@@ -327,11 +326,12 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
}
static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
- int notify_fail, bool ec_function)
+ int event, bool ec_function)
{
u32 function = get_function(func_id, ec_function);
u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
+ int notify_fail = event;
u64 addr;
int err;
u32 *in;
@@ -351,8 +351,10 @@ retry:
if (err) {
if (err == -ENOMEM)
err = alloc_system_page(dev, function);
- if (err)
+ if (err) {
+ dev->priv.fw_pages_alloc_failed += (npages - i);
goto out_4k;
+ }
goto retry;
}
@@ -365,11 +367,20 @@ retry:
MLX5_SET(manage_pages_in, in, input_num_entries, npages);
MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out));
+ if (err == -EREMOTEIO) {
+ notify_fail = 0;
+ /* if triggered by FW and failed by FW ignore */
+ if (event) {
+ err = 0;
+ goto out_dropped;
+ }
+ }
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
func_id, npages, err);
- goto out_4k;
+ goto out_dropped;
}
dev->priv.fw_pages += npages;
@@ -384,6 +395,8 @@ retry:
kvfree(in);
return 0;
+out_dropped:
+ dev->priv.give_pages_dropped += npages;
out_4k:
for (i--; i >= 0; i--)
free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function);
@@ -455,7 +468,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
u32 i = 0;
if (!mlx5_cmd_is_down(dev))
- return mlx5_cmd_exec(dev, in, in_size, out, out_size);
+ return mlx5_cmd_do(dev, in, in_size, out, out_size);
/* No hard feelings, we want our pages back! */
npages = MLX5_GET(manage_pages_in, in, input_num_entries);
@@ -479,7 +492,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
}
static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
- int *nclaimed, bool ec_function)
+ int *nclaimed, bool event, bool ec_function)
{
u32 function = get_function(func_id, ec_function);
int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
@@ -507,6 +520,17 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
func_id, npages, outlen);
err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
if (err) {
+ npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+ dev->priv.reclaim_pages_discard += npages;
+ }
+ /* if triggered by FW event and failed by FW then ignore */
+ if (event && err == -EREMOTEIO) {
+ err = 0;
+ goto out_free;
+ }
+
+ err = mlx5_cmd_check(dev, err, in, out);
+ if (err) {
mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
@@ -546,7 +570,7 @@ static void pages_work_handler(struct work_struct *work)
release_all_pages(dev, req->func_id, req->ec_function);
else if (req->npages < 0)
err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
- req->ec_function);
+ true, req->ec_function);
else if (req->npages > 0)
err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
@@ -645,7 +669,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
int err;
err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(),
- &nclaimed, mlx5_core_is_ecpf(dev));
+ &nclaimed, false, mlx5_core_is_ecpf(dev));
if (err) {
mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n",
err, func_id);
@@ -700,12 +724,14 @@ int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
return -ENOMEM;
xa_init(&dev->priv.page_root_xa);
+ mlx5_pages_debugfs_init(dev);
return 0;
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
{
+ mlx5_pages_debugfs_cleanup(dev);
xa_destroy(&dev->priv.page_root_xa);
destroy_workqueue(dev->priv.pg_wq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 830444f927d4..662f1d55e30e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -3,19 +3,15 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
+#include "pci_irq.h"
#include "lib/sf.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
-#define MLX5_MAX_IRQ_NAME (32)
-/* max irq_index is 2047, so four chars */
-#define MLX5_MAX_IRQ_IDX_CHARS (4)
-
#define MLX5_SFS_PER_CTRL_IRQ 64
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
@@ -25,7 +21,6 @@
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
-#define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq {
struct atomic_notifier_head nh;
@@ -37,16 +32,6 @@ struct mlx5_irq {
int irqn;
};
-struct mlx5_irq_pool {
- char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
- struct xa_limit xa_num_irqs;
- struct mutex lock; /* sync IRQs creations */
- struct xarray irqs;
- u32 max_threshold;
- u32 min_threshold;
- struct mlx5_core_dev *dev;
-};
-
struct mlx5_irq_table {
struct mlx5_irq_pool *pf_pool;
struct mlx5_irq_pool *sf_ctrl_pool;
@@ -109,8 +94,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
if (msix_vec_count > max_msix)
return -EOVERFLOW;
- query_cap = kzalloc(query_sz, GFP_KERNEL);
- hca_cap = kzalloc(set_sz, GFP_KERNEL);
+ query_cap = kvzalloc(query_sz, GFP_KERNEL);
+ hca_cap = kvzalloc(set_sz, GFP_KERNEL);
if (!hca_cap || !query_cap) {
ret = -ENOMEM;
goto out;
@@ -133,8 +118,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
out:
- kfree(hca_cap);
- kfree(query_cap);
+ kvfree(hca_cap);
+ kvfree(query_cap);
return ret;
}
@@ -143,28 +128,38 @@ static void irq_release(struct mlx5_irq *irq)
struct mlx5_irq_pool *pool = irq->pool;
xa_erase(&pool->irqs, irq->index);
- /* free_irq requires that affinity and rmap will be cleared
+ /* free_irq requires that affinity_hint and rmap will be cleared
* before calling it. This is why there is asymmetry with set_rmap
* which should be called after alloc_irq but before request_irq.
*/
- irq_set_affinity_hint(irq->irqn, NULL);
+ irq_update_affinity_hint(irq->irqn, NULL);
free_cpumask_var(irq->mask);
free_irq(irq->irqn, &irq->nh);
kfree(irq);
}
-static void irq_put(struct mlx5_irq *irq)
+int mlx5_irq_put(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
+ int ret = 0;
mutex_lock(&pool->lock);
irq->refcount--;
- if (!irq->refcount)
+ if (!irq->refcount) {
irq_release(irq);
+ ret = 1;
+ }
mutex_unlock(&pool->lock);
+ return ret;
+}
+
+int mlx5_irq_read_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ return irq->refcount;
}
-static int irq_get_locked(struct mlx5_irq *irq)
+int mlx5_irq_get_locked(struct mlx5_irq *irq)
{
lockdep_assert_held(&irq->pool->lock);
if (WARN_ON_ONCE(!irq->refcount))
@@ -178,7 +173,7 @@ static int irq_get(struct mlx5_irq *irq)
int err;
mutex_lock(&irq->pool->lock);
- err = irq_get_locked(irq);
+ err = mlx5_irq_get_locked(irq);
mutex_unlock(&irq->pool->lock);
return err;
}
@@ -210,12 +205,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
-static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
-{
- return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
-}
-
-static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
@@ -226,7 +217,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
if (!irq)
return ERR_PTR(-ENOMEM);
irq->irqn = pci_irq_vector(dev->pdev, i);
- if (!irq_pool_is_sf_pool(pool))
+ if (!mlx5_irq_pool_is_sf_pool(pool))
irq_set_name(pool, name, i);
else
irq_sf_set_name(pool, name, i);
@@ -244,6 +235,10 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
err = -ENOMEM;
goto err_cpumask;
}
+ if (affinity) {
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_and_hint(irq->irqn, irq->mask);
+ }
irq->pool = pool;
irq->refcount = 1;
irq->index = i;
@@ -255,6 +250,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
}
return irq;
err_xa:
+ irq_update_affinity_hint(irq->irqn, NULL);
free_cpumask_var(irq->mask);
err_cpumask:
free_irq(irq->irqn, &irq->nh);
@@ -275,7 +271,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
return -ENOENT;
ret = atomic_notifier_chain_register(&irq->nh, nb);
if (ret)
- irq_put(irq);
+ mlx5_irq_put(irq);
return ret;
}
@@ -284,7 +280,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
int err = 0;
err = atomic_notifier_chain_unregister(&irq->nh, nb);
- irq_put(irq);
+ mlx5_irq_put(irq);
return err;
}
@@ -300,131 +296,121 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
/* irq_pool API */
-/* creating an irq from irq_pool */
-static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+ struct cpumask *affinity)
{
struct mlx5_irq *irq;
- u32 irq_index;
- int err;
- err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
- GFP_KERNEL);
- if (err)
- return ERR_PTR(err);
- irq = irq_request(pool, irq_index);
- if (IS_ERR(irq))
- return irq;
- cpumask_copy(irq->mask, affinity);
- irq_set_affinity_hint(irq->irqn, irq->mask);
+ mutex_lock(&pool->lock);
+ irq = xa_load(&pool->irqs, vecidx);
+ if (irq) {
+ mlx5_irq_get_locked(irq);
+ goto unlock;
+ }
+ irq = mlx5_irq_alloc(pool, vecidx, affinity);
+unlock:
+ mutex_unlock(&pool->lock);
return irq;
}
-/* looking for the irq with the smallest refcount and the same affinity */
-static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
{
- int start = pool->xa_num_irqs.min;
- int end = pool->xa_num_irqs.max;
- struct mlx5_irq *irq = NULL;
- struct mlx5_irq *iter;
- unsigned long index;
+ return irq_table->sf_ctrl_pool;
+}
- lockdep_assert_held(&pool->lock);
- xa_for_each_range(&pool->irqs, index, iter, start, end) {
- if (!cpumask_equal(iter->mask, affinity))
- continue;
- if (iter->refcount < pool->min_threshold)
- return iter;
- if (!irq || iter->refcount < irq->refcount)
- irq = iter;
- }
- return irq;
+static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->sf_comp_pool;
}
-/* requesting an irq from a given pool according to given affinity */
-static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
{
- struct mlx5_irq *least_loaded_irq, *new_irq;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- mutex_lock(&pool->lock);
- least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
- if (least_loaded_irq &&
- least_loaded_irq->refcount < pool->min_threshold)
- goto out;
- new_irq = irq_pool_create_irq(pool, affinity);
- if (IS_ERR(new_irq)) {
- if (!least_loaded_irq) {
- mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
- cpumask_first(affinity));
- mutex_unlock(&pool->lock);
- return new_irq;
- }
- /* We failed to create a new IRQ for the requested affinity,
- * sharing existing IRQ.
- */
- goto out;
- }
- least_loaded_irq = new_irq;
- goto unlock;
-out:
- irq_get_locked(least_loaded_irq);
- if (least_loaded_irq->refcount > pool->max_threshold)
- mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
- least_loaded_irq->irqn, pool->name,
- least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ);
-unlock:
- mutex_unlock(&pool->lock);
- return least_loaded_irq;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
}
-/* requesting an irq from a given pool according to given index */
-static struct mlx5_irq *
-irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
- struct cpumask *affinity)
+static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
{
- struct mlx5_irq *irq;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- mutex_lock(&pool->lock);
- irq = xa_load(&pool->irqs, vecidx);
- if (irq) {
- irq_get_locked(irq);
- goto unlock;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_ctrl_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+/**
+ * mlx5_irqs_release - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
+{
+ int i;
+
+ for (i = 0; i < nirqs; i++) {
+ synchronize_irq(irqs[i]->irqn);
+ mlx5_irq_put(irqs[i]);
}
- irq = irq_request(pool, vecidx);
- if (IS_ERR(irq) || !affinity)
- goto unlock;
- cpumask_copy(irq->mask, affinity);
- if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max &&
- cpumask_empty(irq->mask))
- cpumask_set_cpu(0, irq->mask);
- irq_set_affinity_hint(irq->irqn, irq->mask);
-unlock:
- mutex_unlock(&pool->lock);
- return irq;
}
-static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table,
- int i, struct cpumask *affinity)
+/**
+ * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
+ * @ctrl_irq: ctrl IRQ to be released.
+ */
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
{
- if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL)
- return irq_table->sf_ctrl_pool;
- return irq_table->sf_comp_pool;
+ mlx5_irqs_release(&ctrl_irq, 1);
}
/**
- * mlx5_irq_release - release an IRQ back to the system.
- * @irq: irq to be released.
+ * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
+ * @dev: mlx5 device that requesting the IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
-void mlx5_irq_release(struct mlx5_irq *irq)
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
{
- synchronize_irq(irq->irqn);
- irq_put(irq);
+ struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ cpumask_copy(req_mask, cpu_online_mask);
+ if (!mlx5_irq_pool_is_sf_pool(pool)) {
+ /* In case we are allocating a control IRQ for PF/VF */
+ if (!pool->xa_num_irqs.max) {
+ cpumask_clear(req_mask);
+ /* In case we only have a single IRQ for PF/VF */
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
+ }
+ /* Allocate the IRQ in the last index of the pool */
+ irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ } else {
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ }
+
+ free_cpumask_var(req_mask);
+ return irq;
}
/**
- * mlx5_irq_request - request an IRQ for mlx5 device.
+ * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
* @dev: mlx5 device that requesting the IRQ.
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
* provided.
@@ -439,23 +425,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
- if (mlx5_core_is_sf(dev)) {
- pool = find_sf_irq_pool(irq_table, vecidx, affinity);
- if (!pool)
- /* we don't have IRQs for SFs, using the PF IRQs */
- goto pf_irq;
- if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
- /* In case an SF user request IRQ with vecidx */
- irq = irq_pool_request_vector(pool, vecidx, NULL);
- else
- irq = irq_pool_request_affinity(pool, affinity);
- goto out;
- }
-pf_irq:
pool = irq_table->pf_pool;
- vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx;
irq = irq_pool_request_vector(pool, vecidx, affinity);
-out:
if (IS_ERR(irq))
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
@@ -464,6 +435,51 @@ out:
return irq;
}
+/**
+ * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
+{
+ mlx5_irqs_release(irqs, nirqs);
+}
+
+/**
+ * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @cpus: CPUs array for binding the IRQs
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bound to at most 1 CPU.
+ * This function is requests nirqs IRQs, starting from @vecidx.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ for (i = 0; i < nirqs; i++) {
+ cpumask_set_cpu(cpus[i], req_mask);
+ irq = mlx5_irq_request(dev, i, req_mask);
+ if (IS_ERR(irq))
+ break;
+ cpumask_clear(req_mask);
+ irqs[i] = irq;
+ }
+
+ free_cpumask_var(req_mask);
+ return i ? i : PTR_ERR(irq);
+}
+
static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
u32 min_threshold, u32 max_threshold)
@@ -479,7 +495,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
pool->xa_num_irqs.max = start + size - 1;
if (name)
snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
- name);
+ "%s", name);
pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
@@ -500,6 +516,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
irq_release(irq);
xa_destroy(&pool->irqs);
mutex_destroy(&pool->lock);
+ kfree(pool->irqs_per_cpu);
kvfree(pool);
}
@@ -547,7 +564,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
err = PTR_ERR(table->sf_comp_pool);
goto err_sf_ctrl;
}
+
+ table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
+ if (!table->sf_comp_pool->irqs_per_cpu) {
+ err = -ENOMEM;
+ goto err_irqs_per_cpu;
+ }
+
return 0;
+
+err_irqs_per_cpu:
+ irq_pool_free(table->sf_comp_pool);
err_sf_ctrl:
irq_pool_free(table->sf_ctrl_pool);
err_pf:
@@ -573,7 +600,8 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev)
if (mlx5_core_is_sf(dev))
return 0;
- irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+ irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
+ dev->priv.numa_node);
if (!irq_table)
return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
new file mode 100644
index 000000000000..5c7e68bee43a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __PCI_IRQ_H__
+#define __PCI_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+/* max irq_index is 2047, so four chars */
+#define MLX5_MAX_IRQ_IDX_CHARS (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
+struct mlx5_irq;
+
+struct mlx5_irq_pool {
+ char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
+ struct xa_limit xa_num_irqs;
+ struct mutex lock; /* sync IRQs creations */
+ struct xarray irqs;
+ u32 max_threshold;
+ u32 min_threshold;
+ u16 *irqs_per_cpu;
+ struct mlx5_core_dev *dev;
+};
+
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
+static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+{
+ return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+}
+
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity);
+int mlx5_irq_get_locked(struct mlx5_irq *irq);
+int mlx5_irq_read_locked(struct mlx5_irq *irq);
+int mlx5_irq_put(struct mlx5_irq *irq);
+
+#endif /* __PCI_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index aabc53ad8bdd..ee5ffdeb9015 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 1ef2b6a848c1..a1548e6bfb35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -33,9 +33,10 @@
#include <linux/mlx5/port.h>
#include "mlx5_core.h"
-int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
- int size_in, void *data_out, int size_out,
- u16 reg_id, int arg, int write)
+/* calling with verbose false will not print error to log */
+int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
+ void *data_out, int size_out, u16 reg_id, int arg,
+ int write, bool verbose)
{
int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
@@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
MLX5_SET(access_register_in, in, argument, arg);
MLX5_SET(access_register_in, in, register_id, reg_id);
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
+ if (verbose)
+ err = mlx5_cmd_check(dev, err, in, out);
if (err)
goto out;
@@ -69,6 +72,15 @@ out:
kvfree(in);
return err;
}
+EXPORT_SYMBOL_GPL(mlx5_access_reg);
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ int size_in, void *data_out, int size_out,
+ u16 reg_id, int arg, int write)
+{
+ return mlx5_access_reg(dev, data_in, size_in, data_out, size_out,
+ reg_id, arg, write, true);
+}
EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
@@ -263,7 +275,6 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
{
u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
- int module_mapping;
int err;
MLX5_SET(pmlp_reg, in, local_port, 1);
@@ -272,8 +283,9 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
if (err)
return err;
- module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping);
- *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK;
+ *module_num = MLX5_GET(lane_2_module_mapping,
+ MLX5_ADDR_OF(pmlp_reg, out, lane0_module_mapping),
+ module);
return 0;
}
@@ -353,6 +365,12 @@ static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset
*offset -= MLX5_EEPROM_PAGE_LENGTH;
}
+static int mlx5_mcia_max_bytes(struct mlx5_core_dev *dev)
+{
+ /* mcia supports either 12 dwords or 32 dwords */
+ return (MLX5_CAP_MCAM_FEATURE(dev, mcia_32dwords) ? 32 : 12) * sizeof(u32);
+}
+
static int mlx5_query_mcia(struct mlx5_core_dev *dev,
struct mlx5_module_eeprom_query_params *params, u8 *data)
{
@@ -362,7 +380,7 @@ static int mlx5_query_mcia(struct mlx5_core_dev *dev,
void *ptr;
u16 size;
- size = min_t(int, params->size, MLX5_EEPROM_MAX_BYTES);
+ size = min_t(int, params->size, mlx5_mcia_max_bytes(dev));
MLX5_SET(mcia_reg, in, l, 0);
MLX5_SET(mcia_reg, in, size, size);
@@ -406,23 +424,24 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
switch (module_id) {
case MLX5_MODULE_ID_SFP:
- mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
+ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
break;
case MLX5_MODULE_ID_QSFP:
case MLX5_MODULE_ID_QSFP_PLUS:
case MLX5_MODULE_ID_QSFP28:
- mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
+ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
break;
default:
mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
return -EINVAL;
}
- if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
/* Cross pages read, read until offset 256 in low page */
- size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
+ size = MLX5_EEPROM_PAGE_LENGTH - offset;
query.size = size;
+ query.offset = offset;
return mlx5_query_mcia(dev, &query, data);
}
@@ -432,35 +451,12 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
struct mlx5_module_eeprom_query_params *params,
u8 *data)
{
- u8 module_id;
int err;
err = mlx5_query_module_num(dev, &params->module_number);
if (err)
return err;
- err = mlx5_query_module_id(dev, params->module_number, &module_id);
- if (err)
- return err;
-
- switch (module_id) {
- case MLX5_MODULE_ID_SFP:
- if (params->page > 0)
- return -EINVAL;
- break;
- case MLX5_MODULE_ID_QSFP:
- case MLX5_MODULE_ID_QSFP28:
- case MLX5_MODULE_ID_QSFP_PLUS:
- if (params->page > 3)
- return -EINVAL;
- break;
- case MLX5_MODULE_ID_DSFP:
- break;
- default:
- mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
- return -EINVAL;
- }
-
if (params->i2c_address != MLX5_I2C_ADDR_HIGH &&
params->i2c_address != MLX5_I2C_ADDR_LOW) {
mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address);
@@ -497,29 +493,6 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
-int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
- u8 port_num, void *out, size_t sz)
-{
- u32 *in;
- int err;
-
- in = kvzalloc(sz, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- return err;
- }
-
- MLX5_SET(ppcnt_reg, in, local_port, port_num);
-
- MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
- err = mlx5_core_access_reg(dev, in, sz, out,
- sz, MLX5_REG_PPCNT, 0, 0);
-
- kvfree(in);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
-
static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
u32 out_size)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 7161220afe30..9f8b4005f4bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index f37db7cc32a6..7da012ff0d41 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -30,10 +30,7 @@ bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
{
struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
- if (!mlx5_sf_dev_supported(dev))
- return false;
-
- return !xa_empty(&table->devices);
+ return table && !xa_empty(&table->devices);
}
static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 3be659cd91f1..7d955a4d9f14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -501,7 +501,7 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
case MLX5_ESWITCH_OFFLOADS:
mlx5_sf_table_enable(table);
break;
- case MLX5_ESWITCH_NONE:
+ case MLX5_ESWITCH_LEGACY:
mlx5_sf_table_disable(table);
break;
default:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index 252d6017387d..17aa348989cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -247,7 +247,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
{
struct mlx5_sf_hw_table *table;
u16 max_ext_fn = 0;
- u16 ext_base_id;
+ u16 ext_base_id = 0;
u16 max_fn = 0;
u16 base_id;
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index e8185b69ac6c..c0e6c487c63c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -87,6 +87,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
enable_vfs_hca:
num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
for (vf = 0; vf < num_vfs; vf++) {
+ /* Notify the VF before its enablement to let it set
+ * some stuff.
+ */
+ blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
+ MLX5_PF_NOTIFY_ENABLE_VF, dev);
err = mlx5_core_enable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
@@ -127,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
for (vf = num_vfs - 1; vf >= 0; vf--) {
if (!sriov->vfs_ctx[vf].enabled)
continue;
+ /* Notify the VF before its disablement to let it clean
+ * some resources.
+ */
+ blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
+ MLX5_PF_NOTIFY_DISABLE_VF, dev);
err = mlx5_core_disable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
@@ -135,8 +145,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
sriov->vfs_ctx[vf].enabled = 0;
}
- if (MLX5_ESWITCH_MANAGER(dev))
- mlx5_eswitch_disable(dev->priv.eswitch, clear_vf);
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf);
if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -145,9 +154,12 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
int err;
+ devl_lock(devlink);
err = mlx5_device_enable_sriov(dev, num_vfs);
+ devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
return err;
@@ -161,13 +173,16 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
return err;
}
-static void mlx5_sriov_disable(struct pci_dev *pdev)
+void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
int num_vfs = pci_num_vf(dev->pdev);
pci_disable_sriov(pdev);
+ devl_lock(devlink);
mlx5_device_disable_sriov(dev, num_vfs, true);
+ devl_unlock(devlink);
}
int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -205,19 +220,8 @@ int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
sriov = &dev->priv.sriov;
-
- /* Reversed translation of PCI VF function number to the internal
- * function_id, which exists in the name of virtfn symlink.
- */
- for (id = 0; id < pci_num_vf(pf); id++) {
- if (!sriov->vfs_ctx[id].enabled)
- continue;
-
- if (vf->devfn == pci_iov_virtfn_devfn(pf, id))
- break;
- }
-
- if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled)
+ id = pci_iov_vf_id(vf);
+ if (id < 0 || !sriov->vfs_ctx[id].enabled)
return -EINVAL;
return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
@@ -268,7 +272,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
struct pci_dev *pdev = dev->pdev;
- int total_vfs;
+ int total_vfs, i;
if (!mlx5_core_is_pf(dev))
return 0;
@@ -280,6 +284,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
if (!sriov->vfs_ctx)
return -ENOMEM;
+ for (i = 0; i < total_vfs; i++)
+ BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier);
+
return 0;
}
@@ -292,3 +299,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
kfree(sriov->vfs_ctx);
}
+
+/**
+ * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from
+ * a notification block chain.
+ *
+ * @mdev: The mlx5 core device.
+ * @vf_id: The VF id.
+ * @nb: The notifier block to be unregistered.
+ */
+void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
+ int vf_id,
+ struct notifier_block *nb)
+{
+ struct mlx5_vf_context *vfs_ctx;
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &mdev->priv.sriov;
+ if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs))
+ return;
+
+ vfs_ctx = &sriov->vfs_ctx[vf_id];
+ blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb);
+}
+EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister);
+
+/**
+ * mlx5_sriov_blocking_notifier_register - Register a VF notification
+ * block chain.
+ *
+ * @mdev: The mlx5 core device.
+ * @vf_id: The VF id.
+ * @nb: The notifier block to be called upon the VF events.
+ *
+ * Returns 0 on success or an error code.
+ */
+int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
+ int vf_id,
+ struct notifier_block *nb)
+{
+ struct mlx5_vf_context *vfs_ctx;
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &mdev->priv.sriov;
+ if (vf_id < 0 || vf_id >= sriov->num_vfs)
+ return -EINVAL;
+
+ vfs_ctx = &sriov->vfs_ctx[vf_id];
+ return blocking_notifier_chain_register(&vfs_ctx->notifier, nb);
+}
+EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 07936841ce99..b1dfad274a39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -22,6 +22,7 @@ enum dr_action_valid_state {
DR_ACTION_STATE_PUSH_VLAN,
DR_ACTION_STATE_NON_TERM,
DR_ACTION_STATE_TERM,
+ DR_ACTION_STATE_ASO,
DR_ACTION_STATE_MAX,
};
@@ -42,6 +43,7 @@ static const char * const action_type_to_str[] = {
[DR_ACTION_TYP_SAMPLER] = "DR_ACTION_TYP_SAMPLER",
[DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR",
[DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR",
+ [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER",
[DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN",
};
@@ -71,6 +73,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -85,6 +88,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -93,6 +97,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -105,6 +110,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -118,6 +124,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
@@ -128,6 +135,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -145,6 +153,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -163,18 +178,21 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -185,6 +203,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
@@ -196,6 +215,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -206,6 +226,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -219,6 +240,16 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -240,6 +271,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -253,6 +285,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -261,6 +294,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -272,6 +306,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -284,6 +319,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -296,6 +332,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -312,6 +349,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -331,6 +375,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -338,6 +383,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -345,6 +391,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -356,6 +403,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
@@ -368,6 +416,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -379,6 +428,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
@@ -393,6 +443,17 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -530,6 +591,37 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
return 0;
}
+static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_actions_attr *attr,
+ bool rx_rule,
+ bool *recalc_cs_required)
+{
+ *recalc_cs_required = false;
+
+ /* if device supports csum recalculation - no adjustment needed */
+ if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps))
+ return;
+
+ /* no adjustment needed on TX rules */
+ if (!rx_rule)
+ return;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) {
+ /* Ignore the modify TTL action.
+ * It is always kept as last HW action.
+ */
+ attr->modify_actions--;
+ return;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+ /* Due to a HW bug on some devices, modifying TTL on RX flows
+ * will cause an incorrect checksum calculation. In such cases
+ * we will use a FW table to recalculate the checksum.
+ */
+ *recalc_cs_required = true;
+}
+
static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
struct mlx5dr_action *actions[],
int last_idx)
@@ -570,6 +662,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
for (i = 0; i < num_actions; i++) {
struct mlx5dr_action_dest_tbl *dest_tbl;
+ struct mlx5dr_icm_chunk *chunk;
struct mlx5dr_action *action;
int max_actions_type = 1;
u32 action_type;
@@ -598,9 +691,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
matcher->tbl->level,
dest_tbl->tbl->level);
}
- attr.final_icm_addr = rx_rule ?
- dest_tbl->tbl->rx.s_anchor->chunk->icm_addr :
- dest_tbl->tbl->tx.s_anchor->chunk->icm_addr;
+ chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk :
+ dest_tbl->tbl->tx.s_anchor->chunk;
+ attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
} else {
struct mlx5dr_cmd_query_flow_table_details output;
int ret;
@@ -649,8 +742,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
case DR_ACTION_TYP_MODIFY_HDR:
attr.modify_index = action->rewrite->index;
attr.modify_actions = action->rewrite->num_of_actions;
- recalc_cs_required = action->rewrite->modify_ttl &&
- !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps);
+ if (action->rewrite->modify_ttl)
+ dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
+ &recalc_cs_required);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
case DR_ACTION_TYP_L2_TO_TNL_L3:
@@ -669,15 +763,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
case DR_ACTION_TYP_VPORT:
attr.hit_gvmi = action->vport->caps->vhca_gvmi;
dest_action = action;
- if (rx_rule) {
- if (action->vport->caps->num == MLX5_VPORT_UPLINK) {
- mlx5dr_dbg(dmn, "Device doesn't support Loopback on WIRE vport\n");
- return -EOPNOTSUPP;
- }
- attr.final_icm_addr = action->vport->caps->icm_address_rx;
- } else {
- attr.final_icm_addr = action->vport->caps->icm_address_tx;
- }
+ attr.final_icm_addr = rx_rule ?
+ action->vport->caps->icm_address_rx :
+ action->vport->caps->icm_address_tx;
break;
case DR_ACTION_TYP_POP_VLAN:
if (!rx_rule && !(dmn->ste_ctx->actions_caps &
@@ -711,6 +799,12 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
attr.reformat.param_0 = action->reformat->param_0;
attr.reformat.param_1 = action->reformat->param_1;
break;
+ case DR_ACTION_TYP_ASO_FLOW_METER:
+ attr.aso_flow_meter.obj_id = action->aso->obj_id;
+ attr.aso_flow_meter.offset = action->aso->offset;
+ attr.aso_flow_meter.dest_reg_id = action->aso->dest_reg_id;
+ attr.aso_flow_meter.init_color = action->aso->init_color;
+ break;
default:
mlx5dr_err(dmn, "Unsupported action type %d\n", action_type);
return -EINVAL;
@@ -737,12 +831,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
*new_hw_ste_arr_sz = nic_matcher->num_of_builders;
last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
- /* Due to a HW bug in some devices, modifying TTL on RX flows will
- * cause an incorrect checksum calculation. In this case we will
- * use a FW table to recalculate.
- */
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
- rx_rule && recalc_cs_required && dest_action) {
+ if (recalc_cs_required && dest_action) {
ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
if (ret) {
mlx5dr_err(dmn,
@@ -776,6 +865,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = {
[DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat),
[DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat),
[DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler),
+ [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter),
};
static struct mlx5dr_action *
@@ -847,7 +937,8 @@ struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
u32 num_of_dests,
- bool ignore_flow_level)
+ bool ignore_flow_level,
+ u32 flow_source)
{
struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
struct mlx5dr_action **ref_actions;
@@ -919,7 +1010,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
reformat_req,
&action->dest_tbl->fw_tbl.id,
&action->dest_tbl->fw_tbl.group_id,
- ignore_flow_level);
+ ignore_flow_level,
+ flow_source);
if (ret)
goto free_action;
@@ -1129,7 +1221,8 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
}
action->rewrite->data = (void *)hw_actions;
- action->rewrite->index = (action->rewrite->chunk->icm_addr -
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr
+ (action->rewrite->chunk) -
dmn->info.caps.hdr_modify_icm_addr) /
ACTION_CACHE_LINE_SIZE;
@@ -1571,6 +1664,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ __be64 *modify_ttl_sw_action = NULL;
int ret, i, hw_idx = 0;
__be64 *sw_action;
__be64 hw_action;
@@ -1583,16 +1677,26 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
action->rewrite->allow_rx = 1;
action->rewrite->allow_tx = 1;
- for (i = 0; i < num_sw_actions; i++) {
- sw_action = &sw_actions[i];
+ for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) {
+ /* modify TTL is handled separately, as a last action */
+ if (i == num_sw_actions) {
+ sw_action = modify_ttl_sw_action;
+ modify_ttl_sw_action = NULL;
+ } else {
+ sw_action = &sw_actions[i];
+ }
ret = dr_action_modify_check_field_limitation(action,
sw_action);
if (ret)
return ret;
- if (!(*modify_ttl))
- *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action);
+ if (!(*modify_ttl) &&
+ dr_action_modify_check_is_ttl_modify(sw_action)) {
+ modify_ttl_sw_action = sw_action;
+ *modify_ttl = true;
+ continue;
+ }
/* Convert SW action to HW action */
ret = dr_action_modify_sw_to_hw(dmn,
@@ -1631,7 +1735,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
* modify actions doesn't exceeds the limit
*/
hw_idx++;
- if ((num_sw_actions + hw_idx - i) >= max_hw_actions) {
+ if (hw_idx >= max_hw_actions) {
mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n");
return -EINVAL;
}
@@ -1642,6 +1746,10 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
hw_idx++;
}
+ /* if the resulting HW actions list is empty, add NOP action */
+ if (!hw_idx)
+ hw_idx++;
+
*num_hw_actions = hw_idx;
return 0;
@@ -1693,7 +1801,7 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
action->rewrite->modify_ttl = modify_ttl;
action->rewrite->data = (u8 *)hw_actions;
action->rewrite->num_of_actions = num_hw_actions;
- action->rewrite->index = (chunk->icm_addr -
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
dmn->info.caps.hdr_modify_icm_addr) /
ACTION_CACHE_LINE_SIZE;
@@ -1790,9 +1898,37 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
return action;
}
+struct mlx5dr_action *
+mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id,
+ u8 dest_reg_id, u8 aso_type,
+ u8 init_color, u8 meter_id)
+{
+ struct mlx5dr_action *action;
+
+ if (aso_type != MLX5_EXE_ASO_FLOW_METER)
+ return NULL;
+
+ if (init_color > MLX5_FLOW_METER_COLOR_UNDEFINED)
+ return NULL;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_ASO_FLOW_METER);
+ if (!action)
+ return NULL;
+
+ action->aso->obj_id = obj_id;
+ action->aso->offset = meter_id;
+ action->aso->dest_reg_id = dest_reg_id;
+ action->aso->init_color = init_color;
+ action->aso->dmn = dmn;
+
+ refcount_inc(&dmn->refcount);
+
+ return action;
+}
+
int mlx5dr_action_destroy(struct mlx5dr_action *action)
{
- if (refcount_read(&action->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1))
return -EBUSY;
switch (action->action_type) {
@@ -1841,6 +1977,9 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
case DR_ACTION_TYP_SAMPLER:
refcount_dec(&action->sampler->dmn->refcount);
break;
+ case DR_ACTION_TYP_ASO_FLOW_METER:
+ refcount_dec(&action->aso->dmn->refcount);
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 1d8febed0d76..16d65fe4f654 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -132,6 +132,13 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new);
+ /* geneve_tlv_option_0_exist is the indication of
+ * STE support for lookup type flex_parser_ok
+ */
+ caps->flex_parser_ok_bits_supp =
+ MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist);
+
if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) {
caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
@@ -152,7 +159,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->flex_parser_id_mpls_over_gre =
MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre);
- if (caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)
caps->flex_parser_id_mpls_over_udp =
MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label);
@@ -304,7 +311,7 @@ int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
MLX5_SET(dest_format_struct, in_dests, destination_type,
- MLX5_FLOW_DESTINATION_TYPE_VPORT);
+ MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT);
MLX5_SET(dest_format_struct, in_dests, destination_id, vport);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
@@ -432,6 +439,7 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
MLX5_SET(create_flow_table_in, in, table_type, attr->table_type);
+ MLX5_SET(create_flow_table_in, in, uid, attr->uid);
ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl);
@@ -597,9 +605,11 @@ static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return 0;
for (i = 0; i < fte->dests_size; i++) {
- if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ||
+ fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_NONE)
continue;
- if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
num_encap++;
num_fwd_destinations++;
@@ -711,25 +721,40 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
int list_size = 0;
for (i = 0; i < fte->dests_size; i++) {
- unsigned int id, type = fte->dest_arr[i].type;
+ enum mlx5_flow_destination_type type = fte->dest_arr[i].type;
+ enum mlx5_ifc_flow_destination_type ifc_type;
+ unsigned int id;
if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ continue;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
id = fte->dest_arr[i].ft_num;
- type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
id = fte->dest_arr[i].ft_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
- id = fte->dest_arr[i].vport.num;
- MLX5_SET(dest_format_struct, in_dests,
- destination_eswitch_owner_vhca_id_valid,
- !!(fte->dest_arr[i].vport.flags &
- MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+ id = fte->dest_arr[i].vport.num;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ !!(fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
+ } else {
+ id = 0;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid, 1);
+ }
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
fte->dest_arr[i].vport.vhca_id);
@@ -746,13 +771,15 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
id = fte->dest_arr[i].sampler_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
break;
default:
id = fte->dest_arr[i].tir_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
}
MLX5_SET(dest_format_struct, in_dests, destination_type,
- type);
+ ifc_type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
in_dests += dst_cnt_size;
list_size++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
new file mode 100644
index 000000000000..7adcf0eec13b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include "dr_types.h"
+
+#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL)
+
+enum dr_dump_rec_type {
+ DR_DUMP_REC_TYPE_DOMAIN = 3000,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004,
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005,
+
+ DR_DUMP_REC_TYPE_TABLE = 3100,
+ DR_DUMP_REC_TYPE_TABLE_RX = 3101,
+ DR_DUMP_REC_TYPE_TABLE_TX = 3102,
+
+ DR_DUMP_REC_TYPE_MATCHER = 3200,
+ DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201,
+ DR_DUMP_REC_TYPE_MATCHER_RX = 3202,
+ DR_DUMP_REC_TYPE_MATCHER_TX = 3203,
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204,
+ DR_DUMP_REC_TYPE_MATCHER_MASK = 3205,
+
+ DR_DUMP_REC_TYPE_RULE = 3300,
+ DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301,
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302,
+ DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303,
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304,
+
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400,
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401,
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402,
+ DR_DUMP_REC_TYPE_ACTION_DROP = 3403,
+ DR_DUMP_REC_TYPE_ACTION_QP = 3404,
+ DR_DUMP_REC_TYPE_ACTION_FT = 3405,
+ DR_DUMP_REC_TYPE_ACTION_CTR = 3406,
+ DR_DUMP_REC_TYPE_ACTION_TAG = 3407,
+ DR_DUMP_REC_TYPE_ACTION_VPORT = 3408,
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409,
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410,
+ DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411,
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412,
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413,
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415,
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420,
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421
+};
+
+void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
+ list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list);
+ mutex_unlock(&tbl->dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
+ list_del(&tbl->dbg_node);
+ mutex_unlock(&tbl->dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ list_del(&rule->dbg_node);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+}
+
+static u64 dr_dump_icm_to_idx(u64 icm_addr)
+{
+ return (icm_addr >> 6) & 0xffffffff;
+}
+
+#define DR_HEX_SIZE 256
+
+static void
+dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
+{
+ if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1))
+ size = DR_HEX_SIZE / 2 - 1; /* truncate */
+
+ bin2hex(hex, src, size);
+ hex[2 * size] = 0; /* NULL-terminate */
+}
+
+static int
+dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+ struct mlx5dr_rule_action_member *action_mem)
+{
+ struct mlx5dr_action *action = action_mem->action;
+ const u64 action_id = DR_DBG_PTR_TO_ID(action);
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_DROP:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id);
+ break;
+ case DR_ACTION_TYP_FT:
+ if (action->dest_tbl->is_fw_tbl)
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->fw_tbl.id,
+ -1);
+ else
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->tbl->table_id,
+ DR_DBG_PTR_TO_ID(action->dest_tbl->tbl));
+
+ break;
+ case DR_ACTION_TYP_CTR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id,
+ action->ctr->ctr_id + action->ctr->offset);
+ break;
+ case DR_ACTION_TYP_TAG:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id,
+ action->flow_tag->flow_tag);
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
+ rule_id, action->rewrite->index);
+ break;
+ case DR_ACTION_TYP_VPORT:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
+ action->vport->caps->num);
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id,
+ rule_id);
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
+ rule_id, action->rewrite->index);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id,
+ rule_id, action->reformat->id);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id,
+ rule_id, action->reformat->id);
+ break;
+ case DR_ACTION_TYP_POP_VLAN:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id,
+ rule_id);
+ break;
+ case DR_ACTION_TYP_PUSH_VLAN:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id,
+ rule_id, action->push_vlan->vlan_hdr);
+ break;
+ case DR_ACTION_TYP_INSERT_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ break;
+ case DR_ACTION_TYP_REMOVE_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ seq_printf(file,
+ "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id,
+ 0, 0, action->sampler->sampler_id,
+ action->sampler->rx_icm_addr,
+ action->sampler->tx_icm_addr);
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+ bool is_rx, const u64 rule_id, u8 format_ver)
+{
+ char hw_ste_dump[DR_HEX_SIZE];
+ u32 mem_rec_type;
+
+ if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) {
+ mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 :
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0;
+ } else {
+ mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 :
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1;
+ }
+
+ dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste),
+ DR_STE_SIZE_REDUCED);
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type,
+ dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id,
+ hw_ste_dump);
+
+ return 0;
+}
+
+static int
+dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+ bool is_rx, const u64 rule_id, u8 format_ver)
+{
+ struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+ struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste;
+ int ret, i;
+
+ if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
+ return 0;
+
+ while (i--) {
+ ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
+ struct mlx5dr_rule_rx_tx *rx = &rule->rx;
+ struct mlx5dr_rule_rx_tx *tx = &rule->tx;
+ u8 format_ver;
+ int ret;
+
+ format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver;
+
+ seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id,
+ DR_DBG_PTR_TO_ID(rule->matcher));
+
+ if (rx->nic_matcher) {
+ ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_matcher) {
+ ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
+ ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+ u8 criteria, const u64 matcher_id)
+{
+ char dump[DR_HEX_SIZE];
+
+ seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK,
+ matcher_id);
+
+ if (criteria & DR_MATCHER_CRITERIA_OUTER) {
+ dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_INNER) {
+ dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC) {
+ dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC2) {
+ dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC3) {
+ dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3));
+ seq_printf(file, "%s\n", dump);
+ } else {
+ seq_puts(file, ",\n");
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+ u32 index, bool is_rx, const u64 matcher_id)
+{
+ seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n",
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx,
+ builder->lu_type);
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+ struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
+ const u64 matcher_id)
+{
+ enum dr_dump_rec_type rec_type;
+ u64 s_icm_addr, e_icm_addr;
+ int i, ret;
+
+ rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX :
+ DR_DUMP_REC_TYPE_MATCHER_TX;
+
+ s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk);
+ e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk);
+ seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n",
+ rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx),
+ matcher_id, matcher_rx_tx->num_of_builders,
+ dr_dump_icm_to_idx(s_icm_addr),
+ dr_dump_icm_to_idx(e_icm_addr));
+
+ for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
+ ret = dr_dump_matcher_builder(file,
+ &matcher_rx_tx->ste_builder[i],
+ i, is_rx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
+ struct mlx5dr_matcher_rx_tx *tx = &matcher->tx;
+ u64 matcher_id;
+ int ret;
+
+ matcher_id = DR_DBG_PTR_TO_ID(matcher);
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER,
+ matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio);
+
+ ret = dr_dump_matcher_mask(file, &matcher->mask,
+ matcher->match_criteria, matcher_id);
+ if (ret < 0)
+ return ret;
+
+ if (rx->nic_tbl) {
+ ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_tbl) {
+ ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_rule *rule;
+ int ret;
+
+ ret = dr_dump_matcher(file, matcher);
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) {
+ ret = dr_dump_rule(file, rule);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+ struct mlx5dr_table_rx_tx *table_rx_tx,
+ const u64 table_id)
+{
+ enum dr_dump_rec_type rec_type;
+ u64 s_icm_addr;
+
+ rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX :
+ DR_DUMP_REC_TYPE_TABLE_TX;
+
+ s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk);
+ seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id,
+ dr_dump_icm_to_idx(s_icm_addr));
+
+ return 0;
+}
+
+static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+{
+ struct mlx5dr_table_rx_tx *rx = &table->rx;
+ struct mlx5dr_table_rx_tx *tx = &table->tx;
+ int ret;
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE,
+ DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn),
+ table->table_type, table->level);
+
+ if (rx->nic_dmn) {
+ ret = dr_dump_table_rx_tx(file, true, rx,
+ DR_DBG_PTR_TO_ID(table));
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_dmn) {
+ ret = dr_dump_table_rx_tx(file, false, tx,
+ DR_DBG_PTR_TO_ID(table));
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
+{
+ struct mlx5dr_matcher *matcher;
+ int ret;
+
+ ret = dr_dump_table(file, tbl);
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(matcher, &tbl->matcher_list, list_node) {
+ ret = dr_dump_matcher_all(file, matcher);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+ const u64 domain_id)
+{
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring),
+ domain_id, ring->cq->mcq.cqn, ring->qp->qpn);
+ return 0;
+}
+
+static int
+dr_dump_domain_info_flex_parser(struct seq_file *file,
+ const char *flex_parser_name,
+ const u8 flex_parser_value,
+ const u64 domain_id)
+{
+ seq_printf(file, "%d,0x%llx,%s,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id,
+ flex_parser_name, flex_parser_value);
+ return 0;
+}
+
+static int
+dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+ const u64 domain_id)
+{
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ unsigned long i, vports_num;
+
+ xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps)
+ ; /* count the number of vports in xarray */
+
+ seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi,
+ caps->nic_rx_drop_address, caps->nic_tx_drop_address,
+ caps->flex_protocols, vports_num, caps->eswitch_manager);
+
+ xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) {
+ vport_caps = xa_load(&caps->vports.vports_caps_xa, i);
+
+ seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i,
+ vport_caps->vport_gvmi, vport_caps->icm_address_rx,
+ vport_caps->icm_address_tx);
+ }
+ return 0;
+}
+
+static int
+dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+ const u64 domain_id)
+{
+ int ret;
+
+ ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ info->caps.flex_parser_id_icmp_dw0,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ info->caps.flex_parser_id_icmp_dw1,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ info->caps.flex_parser_id_icmpv6_dw0,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ info->caps.flex_parser_id_icmpv6_dw1,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int
+dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
+{
+ u64 domain_id = DR_DBG_PTR_TO_ID(dmn);
+ int ret;
+
+ seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN,
+ domain_id, dmn->type, dmn->info.caps.gvmi,
+ dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev));
+
+ ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ if (ret < 0)
+ return ret;
+
+ if (dmn->info.supp_sw_steering) {
+ ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_table *tbl;
+ int ret;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ mlx5dr_domain_lock(dmn);
+
+ ret = dr_dump_domain(file, dmn);
+ if (ret < 0)
+ goto unlock_mutex;
+
+ list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) {
+ ret = dr_dump_table_all(file, tbl);
+ if (ret < 0)
+ break;
+ }
+
+unlock_mutex:
+ mlx5dr_domain_unlock(dmn);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+ return ret;
+}
+
+static int dr_dump_show(struct seq_file *file, void *priv)
+{
+ return dr_dump_domain_all(file, file->private);
+}
+DEFINE_SHOW_ATTRIBUTE(dr_dump);
+
+void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn)
+{
+ struct mlx5_core_dev *dev = dmn->mdev;
+ char file_name[128];
+
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5_core_warn(dev,
+ "Steering dump is not supported for NIC RX/TX domains\n");
+ return;
+ }
+
+ dmn->dump_info.steering_debugfs =
+ debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev));
+ dmn->dump_info.fdb_debugfs =
+ debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs);
+
+ sprintf(file_name, "dmn_%p", dmn);
+ debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs,
+ dmn, &dr_dump_fops);
+
+ INIT_LIST_HEAD(&dmn->dbg_tbl_list);
+ mutex_init(&dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn)
+{
+ debugfs_remove_recursive(dmn->dump_info.steering_debugfs);
+ mutex_destroy(&dmn->dump_info.dbg_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
new file mode 100644
index 000000000000..def6cf853eea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+struct mlx5dr_dbg_dump_info {
+ struct mutex dbg_mutex; /* protect dbg lists */
+ struct dentry *steering_debugfs;
+ struct dentry *fdb_debugfs;
+};
+
+void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn);
+void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn);
+void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl);
+void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl);
+void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule);
+void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 8cbd36c82b3b..fc6ae49b5ecc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -2,12 +2,13 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/mlx5/eswitch.h>
+#include <linux/err.h>
#include "dr_types.h"
#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \
((dmn)->info.caps.dmn_type##_sw_owner || \
((dmn)->info.caps.dmn_type##_sw_owner_v2 && \
- (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_6DX))
+ (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7))
static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn)
{
@@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
}
dmn->uar = mlx5_get_uars_page(dmn->mdev);
- if (!dmn->uar) {
+ if (IS_ERR(dmn->uar)) {
mlx5dr_err(dmn, "Couldn't allocate UAR\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(dmn->uar);
goto clean_pd;
}
@@ -163,9 +164,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn)
{
- return dr_domain_query_vport(dmn,
- dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0,
- false,
+ return dr_domain_query_vport(dmn, 0, false,
&dmn->info.caps.vports.esw_manager_caps);
}
@@ -396,7 +395,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
}
dr_domain_init_csum_recalc_fts(dmn);
-
+ mlx5dr_dbg_init_dump(dmn);
return dmn;
uninit_caps:
@@ -432,11 +431,12 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
{
- if (refcount_read(&dmn->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1))
return -EBUSY;
/* make sure resources are not used by the hardware */
mlx5dr_cmd_sync_steering(dmn->mdev);
+ mlx5dr_dbg_uninit_dump(dmn);
dr_domain_uninit_csum_recalc_fts(dmn);
dr_domain_uninit_resources(dmn);
dr_domain_caps_uninit(dmn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 68a4c32d5f34..f05ef0cd54ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
bool reformat_req,
u32 *tbl_id,
u32 *group_id,
- bool ignore_flow_level)
+ bool ignore_flow_level,
+ u32 flow_source)
{
struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
struct mlx5dr_cmd_fte_info fte_info = {};
@@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
fte_info.val = val;
fte_info.dest_arr = dest;
fte_info.ignore_flow_level = ignore_flow_level;
+ fte_info.flow_context.flow_source = flow_source;
ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 7f6fd9c5e371..4ca67fa24cc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -4,7 +4,6 @@
#include "dr_types.h"
#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
-#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024)
struct mlx5dr_icm_pool {
enum mlx5dr_icm_type icm_type;
@@ -58,6 +57,36 @@ static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
return mlx5_core_create_mkey(mdev, mkey, in, inlen);
}
+u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk)
+{
+ u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);
+
+ return (u64)offset * chunk->seg;
+}
+
+u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk)
+{
+ return chunk->buddy_mem->icm_mr->mkey;
+}
+
+u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk)
+{
+ u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);
+
+ return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg;
+}
+
+u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk)
+{
+ return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size,
+ chunk->buddy_mem->pool->icm_type);
+}
+
+u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk)
+{
+ return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size);
+}
+
static struct mlx5dr_icm_mr *
dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
{
@@ -136,37 +165,36 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
kvfree(icm_mr);
}
-static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
+static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
{
- chunk->ste_arr = kvzalloc(chunk->num_of_entries *
- sizeof(chunk->ste_arr[0]), GFP_KERNEL);
- if (!chunk->ste_arr)
- return -ENOMEM;
-
- chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries *
- DR_STE_SIZE_REDUCED, GFP_KERNEL);
- if (!chunk->hw_ste_arr)
- goto out_free_ste_arr;
-
- chunk->miss_list = kvmalloc(chunk->num_of_entries *
- sizeof(chunk->miss_list[0]), GFP_KERNEL);
- if (!chunk->miss_list)
- goto out_free_hw_ste_arr;
+ /* We support only one type of STE size, both for ConnectX-5 and later
+ * devices. Once the support for match STE which has a larger tag is
+ * added (32B instead of 16B), the STE size for devices later than
+ * ConnectX-5 needs to account for that.
+ */
+ return DR_STE_SIZE_REDUCED;
+}
- return 0;
+static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
+{
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ int index = offset / DR_STE_SIZE;
-out_free_hw_ste_arr:
- kvfree(chunk->hw_ste_arr);
-out_free_ste_arr:
- kvfree(chunk->ste_arr);
- return -ENOMEM;
+ chunk->ste_arr = &buddy->ste_arr[index];
+ chunk->miss_list = &buddy->miss_list[index];
+ chunk->hw_ste_arr = buddy->hw_ste_arr +
+ index * dr_icm_buddy_get_ste_size(buddy);
}
static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
{
- kvfree(chunk->miss_list);
- kvfree(chunk->hw_ste_arr);
- kvfree(chunk->ste_arr);
+ int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+
+ memset(chunk->hw_ste_arr, 0,
+ num_of_entries * dr_icm_buddy_get_ste_size(buddy));
+ memset(chunk->ste_arr, 0,
+ num_of_entries * sizeof(chunk->ste_arr[0]));
}
static enum mlx5dr_icm_type
@@ -180,7 +208,7 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
{
enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk);
- buddy->used_memory -= chunk->byte_size;
+ buddy->used_memory -= mlx5dr_icm_pool_get_chunk_byte_size(chunk);
list_del(&chunk->chunk_list);
if (icm_type == DR_ICM_TYPE_STE)
@@ -189,6 +217,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
kvfree(chunk);
}
+static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
+
+ buddy->ste_arr = kvcalloc(num_of_entries,
+ sizeof(struct mlx5dr_ste), GFP_KERNEL);
+ if (!buddy->ste_arr)
+ return -ENOMEM;
+
+ /* Preallocate full STE size on non-ConnectX-5 devices since
+ * we need to support both full and reduced with the same cache.
+ */
+ buddy->hw_ste_arr = kvcalloc(num_of_entries,
+ dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
+ if (!buddy->hw_ste_arr)
+ goto free_ste_arr;
+
+ buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
+ if (!buddy->miss_list)
+ goto free_hw_ste_arr;
+
+ return 0;
+
+free_hw_ste_arr:
+ kvfree(buddy->hw_ste_arr);
+free_ste_arr:
+ kvfree(buddy->ste_arr);
+ return -ENOMEM;
+}
+
+static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ kvfree(buddy->ste_arr);
+ kvfree(buddy->hw_ste_arr);
+ kvfree(buddy->miss_list);
+}
+
static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
{
struct mlx5dr_icm_buddy_mem *buddy;
@@ -208,11 +274,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
buddy->icm_mr = icm_mr;
buddy->pool = pool;
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ /* Reduce allocations by preallocating and reusing the STE structures */
+ if (dr_icm_buddy_init_ste_cache(buddy))
+ goto err_cleanup_buddy;
+ }
+
/* add it to the -start- of the list in order to search in it first */
list_add(&buddy->list_node, &pool->buddy_mem_list);
return 0;
+err_cleanup_buddy:
+ mlx5dr_buddy_cleanup(buddy);
err_free_buddy:
kvfree(buddy);
free_mr:
@@ -234,6 +308,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
mlx5dr_buddy_cleanup(buddy);
+ if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_cleanup_ste_cache(buddy);
+
kvfree(buddy);
}
@@ -252,48 +329,38 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
- chunk->rkey = buddy_mem_pool->icm_mr->mkey;
- chunk->mr_addr = offset;
- chunk->icm_addr =
- (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset;
- chunk->num_of_entries =
- mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
- chunk->byte_size =
- mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type);
chunk->seg = seg;
+ chunk->size = chunk_size;
+ chunk->buddy_mem = buddy_mem_pool;
- if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) {
- mlx5dr_err(pool->dmn,
- "Failed to init ste arrays (order: %d)\n",
- chunk_size);
- goto out_free_chunk;
- }
+ if (pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_init(chunk, offset);
- buddy_mem_pool->used_memory += chunk->byte_size;
- chunk->buddy_mem = buddy_mem_pool;
+ buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
INIT_LIST_HEAD(&chunk->chunk_list);
/* chunk now is part of the used_list */
list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
return chunk;
-
-out_free_chunk:
- kvfree(chunk);
- return NULL;
}
static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
{
- if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL)
- return true;
+ int allow_hot_size;
+
+ /* sync when hot memory reaches half of the pool size */
+ allow_hot_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) / 2;
- return false;
+ return pool->hot_memory_size > allow_hot_size;
}
static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
{
struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
+ u32 num_entries;
int err;
err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
@@ -306,9 +373,9 @@ static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
struct mlx5dr_icm_chunk *chunk, *tmp_chunk;
list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) {
- mlx5dr_buddy_free_mem(buddy, chunk->seg,
- ilog2(chunk->num_of_entries));
- pool->hot_memory_size -= chunk->byte_size;
+ num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+ mlx5dr_buddy_free_mem(buddy, chunk->seg, ilog2(num_entries));
+ pool->hot_memory_size -= mlx5dr_icm_pool_get_chunk_byte_size(chunk);
dr_icm_chunk_destroy(chunk, buddy);
}
@@ -406,7 +473,7 @@ void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
/* move the memory to the waiting list AKA "hot" */
mutex_lock(&pool->mutex);
list_move_tail(&chunk->chunk_list, &buddy->hot_list);
- pool->hot_memory_size += chunk->byte_size;
+ pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
/* Check if we have chunks that are waiting for sync-ste */
if (dr_icm_pool_is_sync_required(pool))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 793365242e85..0726848eb3ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
return (spec->dmac_47_16 || spec->dmac_15_0);
}
-static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec)
-{
- return (spec->src_ip_127_96 || spec->src_ip_95_64 ||
- spec->src_ip_63_32 || spec->src_ip_31_0);
-}
-
-static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec)
-{
- return (spec->dst_ip_127_96 || spec->dst_ip_95_64 ||
- spec->dst_ip_63_32 || spec->dst_ip_31_0);
-}
-
static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
{
return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
@@ -59,6 +47,11 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec)
return spec->ttl_hoplimit;
}
+static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec)
+{
+ return spec->ipv4_ihl;
+}
+
#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \
(_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \
(_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \
@@ -115,7 +108,7 @@ dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3)
static bool
dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps)
{
- return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
(caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED);
}
@@ -141,9 +134,22 @@ static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3)
}
static bool
+dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_parser_ok_bits_supp;
+}
+
+static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) &&
+ misc->geneve_tlv_option_0_exist;
+}
+
+static bool
dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps)
{
- return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
(caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED);
}
@@ -260,13 +266,13 @@ static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask,
static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps)
{
- return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
(caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED);
}
static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps)
{
- return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
(caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED);
}
@@ -359,7 +365,7 @@ static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask,
static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps)
{
- return caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED;
+ return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED;
}
static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask,
@@ -368,6 +374,12 @@ static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask,
return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) &&
dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps);
}
+
+static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5)
+{
+ return misc5->tunnel_header_0 || misc5->tunnel_header_1;
+}
+
int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
enum mlx5dr_ipv outer_ipv,
@@ -424,6 +436,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4)
mask.misc4 = matcher->mask.misc4;
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5)
+ mask.misc5 = matcher->mask.misc5;
+
ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
&matcher->mask, NULL);
if (ret)
@@ -443,7 +458,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
DR_MATCHER_CRITERIA_MISC |
DR_MATCHER_CRITERIA_MISC2 |
- DR_MATCHER_CRITERIA_MISC3)) {
+ DR_MATCHER_CRITERIA_MISC3 |
+ DR_MATCHER_CRITERIA_MISC5)) {
inner = false;
if (dr_mask_is_wqe_metadata_set(&mask.misc2))
@@ -480,11 +496,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
&mask, inner, rx);
if (outer_ipv == DR_RULE_IPV6) {
- if (dr_mask_is_dst_addr_set(&mask.outer))
+ if (DR_MASK_IS_DST_IP_SET(&mask.outer))
mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
&mask, inner, rx);
- if (dr_mask_is_src_addr_set(&mask.outer))
+ if (DR_MASK_IS_SRC_IP_SET(&mask.outer))
mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
&mask, inner, rx);
@@ -496,7 +512,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++],
&mask, inner, rx);
- if (dr_mask_is_ttl_set(&mask.outer))
+ if (dr_mask_is_ttl_set(&mask.outer) ||
+ dr_mask_is_ipv4_ihl_set(&mask.outer))
mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++],
&mask, inner, rx);
}
@@ -511,6 +528,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++],
&mask, &dmn->info.caps,
inner, rx);
+ if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn))
+ mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
} else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) {
if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn))
mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++],
@@ -525,6 +546,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (dr_mask_is_tnl_gtpu(&mask, dmn))
mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++],
&mask, inner, rx);
+ } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) {
+ mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
}
if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
@@ -580,11 +604,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
&mask, inner, rx);
if (inner_ipv == DR_RULE_IPV6) {
- if (dr_mask_is_dst_addr_set(&mask.inner))
+ if (DR_MASK_IS_DST_IP_SET(&mask.inner))
mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
&mask, inner, rx);
- if (dr_mask_is_src_addr_set(&mask.inner))
+ if (DR_MASK_IS_SRC_IP_SET(&mask.inner))
mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
&mask, inner, rx);
@@ -596,7 +620,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++],
&mask, inner, rx);
- if (dr_mask_is_ttl_set(&mask.inner))
+ if (dr_mask_is_ttl_set(&mask.inner) ||
+ dr_mask_is_ipv4_ihl_set(&mask.inner))
mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++],
&mask, inner, rx);
}
@@ -653,10 +678,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
return 0;
}
-static int dr_matcher_connect(struct mlx5dr_domain *dmn,
- struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
- struct mlx5dr_matcher_rx_tx *next_nic_matcher,
- struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
{
struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl;
struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
@@ -680,7 +705,7 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn,
/* Connect start hash table to end anchor */
info.type = CONNECT_MISS;
- info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr;
+ info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk);
ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
curr_nic_matcher->s_htbl,
&info, false);
@@ -701,69 +726,63 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn,
return ret;
/* Update the pointing ste and next hash table */
- curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr;
- prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
+ curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr;
+ prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
if (next_nic_matcher) {
- next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr;
- curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ next_nic_matcher->s_htbl->pointing_ste =
+ curr_nic_matcher->e_anchor->chunk->ste_arr;
+ curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl =
+ next_nic_matcher->s_htbl;
}
return 0;
}
-static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
{
- struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher;
- struct mlx5dr_table *tbl = matcher->tbl;
- struct mlx5dr_domain *dmn = tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher;
+ struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl;
bool first = true;
int ret;
- next_matcher = NULL;
- list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) {
- if (tmp_matcher->prio >= matcher->prio) {
- next_matcher = tmp_matcher;
+ /* If the nic matcher is already on its parent nic table list,
+ * then it is already connected to the chain of nic matchers.
+ */
+ if (!list_empty(&nic_matcher->list_node))
+ return 0;
+
+ next_nic_matcher = NULL;
+ list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) {
+ if (tmp_nic_matcher->prio >= nic_matcher->prio) {
+ next_nic_matcher = tmp_nic_matcher;
break;
}
first = false;
}
- prev_matcher = NULL;
- if (next_matcher && !first)
- prev_matcher = list_prev_entry(next_matcher, matcher_list);
+ prev_nic_matcher = NULL;
+ if (next_nic_matcher && !first)
+ prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node);
else if (!first)
- prev_matcher = list_last_entry(&tbl->matcher_list,
- struct mlx5dr_matcher,
- matcher_list);
-
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
- ret = dr_matcher_connect(dmn, &matcher->rx,
- next_matcher ? &next_matcher->rx : NULL,
- prev_matcher ? &prev_matcher->rx : NULL);
- if (ret)
- return ret;
- }
+ prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list,
+ struct mlx5dr_matcher_rx_tx,
+ list_node);
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
- ret = dr_matcher_connect(dmn, &matcher->tx,
- next_matcher ? &next_matcher->tx : NULL,
- prev_matcher ? &prev_matcher->tx : NULL);
- if (ret)
- return ret;
- }
+ ret = dr_nic_matcher_connect(dmn, nic_matcher,
+ next_nic_matcher, prev_nic_matcher);
+ if (ret)
+ return ret;
- if (prev_matcher)
- list_add(&matcher->matcher_list, &prev_matcher->matcher_list);
- else if (next_matcher)
- list_add_tail(&matcher->matcher_list,
- &next_matcher->matcher_list);
+ if (prev_nic_matcher)
+ list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node);
+ else if (next_nic_matcher)
+ list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node);
else
- list_add(&matcher->matcher_list, &tbl->matcher_list);
+ list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list);
- return 0;
+ return ret;
}
static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher)
@@ -822,6 +841,9 @@ static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
int ret;
+ nic_matcher->prio = matcher->prio;
+ INIT_LIST_HEAD(&nic_matcher->list_node);
+
ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher);
if (ret)
return ret;
@@ -872,13 +894,12 @@ uninit_nic_rx:
return ret;
}
-static int dr_matcher_init(struct mlx5dr_matcher *matcher,
- struct mlx5dr_match_parameters *mask)
+static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_match_parameters consumed_mask;
- struct mlx5dr_table *tbl = matcher->tbl;
- struct mlx5dr_domain *dmn = tbl->dmn;
- int i, ret;
+ int i, ret = 0;
if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) {
mlx5dr_err(dmn, "Invalid match criteria attribute\n");
@@ -898,10 +919,36 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher,
consumed_mask.match_sz = mask->match_sz;
memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz);
mlx5dr_ste_copy_param(matcher->match_criteria,
- &matcher->mask, &consumed_mask,
- true);
+ &matcher->mask, &consumed_mask, true);
+
+ /* Check that all mask data was consumed */
+ for (i = 0; i < consumed_mask.match_sz; i++) {
+ if (!((u8 *)consumed_mask.match_buf)[i])
+ continue;
+
+ mlx5dr_dbg(dmn,
+ "Match param mask contains unsupported parameters\n");
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ kfree(consumed_mask.match_buf);
}
+ return ret;
+}
+
+static int dr_matcher_init(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_table *tbl = matcher->tbl;
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ int ret;
+
+ ret = dr_matcher_copy_param(matcher, mask);
+ if (ret)
+ return ret;
+
switch (dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
matcher->rx.nic_tbl = &tbl->rx;
@@ -919,23 +966,23 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher,
default:
WARN_ON(true);
ret = -EINVAL;
- goto free_consumed_mask;
}
- /* Check that all mask data was consumed */
- for (i = 0; i < consumed_mask.match_sz; i++) {
- if (!((u8 *)consumed_mask.match_buf)[i])
- continue;
+ return ret;
+}
- mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n");
- ret = -EOPNOTSUPP;
- goto free_consumed_mask;
- }
+static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher)
+{
+ mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+ list_add(&matcher->list_node, &matcher->tbl->matcher_list);
+ mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+}
- ret = 0;
-free_consumed_mask:
- kfree(consumed_mask.match_buf);
- return ret;
+static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher)
+{
+ mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+ list_del(&matcher->list_node);
+ mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex);
}
struct mlx5dr_matcher *
@@ -957,7 +1004,8 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
matcher->prio = priority;
matcher->match_criteria = match_criteria_enable;
refcount_set(&matcher->refcount, 1);
- INIT_LIST_HEAD(&matcher->matcher_list);
+ INIT_LIST_HEAD(&matcher->list_node);
+ INIT_LIST_HEAD(&matcher->dbg_rule_list);
mlx5dr_domain_lock(tbl->dmn);
@@ -965,16 +1013,12 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
if (ret)
goto free_matcher;
- ret = dr_matcher_add_to_tbl(matcher);
- if (ret)
- goto matcher_uninit;
+ dr_matcher_add_to_dbg_list(matcher);
mlx5dr_domain_unlock(tbl->dmn);
return matcher;
-matcher_uninit:
- dr_matcher_uninit(matcher);
free_matcher:
mlx5dr_domain_unlock(tbl->dmn);
kfree(matcher);
@@ -983,10 +1027,10 @@ dec_ref:
return NULL;
}
-static int dr_matcher_disconnect(struct mlx5dr_domain *dmn,
- struct mlx5dr_table_rx_tx *nic_tbl,
- struct mlx5dr_matcher_rx_tx *next_nic_matcher,
- struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
struct mlx5dr_htbl_connect_info info;
@@ -1001,55 +1045,46 @@ static int dr_matcher_disconnect(struct mlx5dr_domain *dmn,
if (next_nic_matcher) {
info.type = CONNECT_HIT;
info.hit_next_htbl = next_nic_matcher->s_htbl;
- next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr;
- prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr;
+ prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
} else {
info.type = CONNECT_MISS;
info.miss_icm_addr = nic_tbl->default_icm_addr;
- prev_anchor->ste_arr[0].next_htbl = NULL;
+ prev_anchor->chunk->ste_arr[0].next_htbl = NULL;
}
return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor,
&info, true);
}
-static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher)
+int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
{
- struct mlx5dr_matcher *prev_matcher, *next_matcher;
- struct mlx5dr_table *tbl = matcher->tbl;
- struct mlx5dr_domain *dmn = tbl->dmn;
- int ret = 0;
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher;
+ struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl;
+ int ret;
- if (list_is_last(&matcher->matcher_list, &tbl->matcher_list))
- next_matcher = NULL;
- else
- next_matcher = list_next_entry(matcher, matcher_list);
+ /* If the nic matcher is not on its parent nic table list,
+ * then it is detached - no need to disconnect it.
+ */
+ if (list_empty(&nic_matcher->list_node))
+ return 0;
- if (matcher->matcher_list.prev == &tbl->matcher_list)
- prev_matcher = NULL;
+ if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list))
+ next_nic_matcher = NULL;
else
- prev_matcher = list_prev_entry(matcher, matcher_list);
-
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
- ret = dr_matcher_disconnect(dmn, &tbl->rx,
- next_matcher ? &next_matcher->rx : NULL,
- prev_matcher ? &prev_matcher->rx : NULL);
- if (ret)
- return ret;
- }
+ next_nic_matcher = list_next_entry(nic_matcher, list_node);
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
- ret = dr_matcher_disconnect(dmn, &tbl->tx,
- next_matcher ? &next_matcher->tx : NULL,
- prev_matcher ? &prev_matcher->tx : NULL);
- if (ret)
- return ret;
- }
+ if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list)
+ prev_nic_matcher = NULL;
+ else
+ prev_nic_matcher = list_prev_entry(nic_matcher, list_node);
- list_del(&matcher->matcher_list);
+ ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher);
+ if (ret)
+ return ret;
+ list_del_init(&nic_matcher->list_node);
return 0;
}
@@ -1057,12 +1092,12 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
{
struct mlx5dr_table *tbl = matcher->tbl;
- if (refcount_read(&matcher->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1))
return -EBUSY;
mlx5dr_domain_lock(tbl->dmn);
- dr_matcher_remove_from_tbl(matcher);
+ dr_matcher_remove_from_dbg_list(matcher);
dr_matcher_uninit(matcher);
refcount_dec(&matcher->tbl->refcount);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 6a390e981b09..91ff19f67695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -5,11 +5,6 @@
#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES)
-struct mlx5dr_rule_action_member {
- struct mlx5dr_action *action;
- struct list_head list;
-};
-
static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste *new_last_ste,
struct list_head *miss_list,
@@ -26,12 +21,12 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx,
if (!ste_info_last)
return -ENOMEM;
- mlx5dr_ste_set_miss_addr(ste_ctx, last_ste->hw_ste,
+ mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste),
mlx5dr_ste_get_icm_addr(new_last_ste));
list_add_tail(&new_last_ste->miss_list_node, miss_list);
mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL,
- 0, last_ste->hw_ste,
+ 0, mlx5dr_ste_get_hw_ste(last_ste),
ste_info_last, send_list, true);
return 0;
@@ -46,6 +41,7 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_htbl *new_htbl;
struct mlx5dr_ste *ste;
+ u64 icm_addr;
/* Create new table for miss entry */
new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
@@ -58,9 +54,9 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
}
/* One and only entry, never grows */
- ste = new_htbl->ste_arr;
- mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste,
- nic_matcher->e_anchor->chunk->icm_addr);
+ ste = new_htbl->chunk->ste_arr;
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr);
mlx5dr_htbl_get(new_htbl);
return ste;
@@ -84,7 +80,7 @@ dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste;
/* In collision entry, all members share the same miss_list_head */
- ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
+ ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
/* Next table */
if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste,
@@ -112,9 +108,11 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info,
* is already written to the hw.
*/
if (ste_info->size == DR_STE_SIZE_CTRL)
- memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_CTRL);
+ memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste),
+ ste_info->data, DR_STE_SIZE_CTRL);
else
- memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED);
+ memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste),
+ ste_info->data, DR_STE_SIZE_REDUCED);
ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data,
ste_info->size, ste_info->offset);
@@ -164,7 +162,7 @@ dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste)
/* Check if hw_ste is present in the list */
list_for_each_entry(ste, miss_list, miss_list_node) {
- if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste))
+ if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste))
return ste;
}
@@ -190,7 +188,7 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste;
/* In collision entry, all members share the same miss_list_head */
- new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste);
+ new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste);
/* Update the previous from the list */
ret = dr_rule_append_to_miss_list(dmn->ste_ctx, new_ste,
@@ -240,6 +238,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
bool use_update_list = false;
u8 hw_ste[DR_STE_SIZE] = {};
struct mlx5dr_ste *new_ste;
+ u64 icm_addr;
int new_idx;
u8 sb_idx;
@@ -248,12 +247,12 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask);
/* Copy STE control and tag */
- memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED);
- mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste,
- nic_matcher->e_anchor->chunk->icm_addr);
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED);
+ mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
- new_ste = &new_htbl->ste_arr[new_idx];
+ new_ste = &new_htbl->chunk->ste_arr[new_idx];
if (mlx5dr_ste_is_not_used(new_ste)) {
mlx5dr_htbl_get(new_htbl);
@@ -274,7 +273,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
use_update_list = true;
}
- memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED);
new_htbl->ctrl.num_of_valid_entries++;
@@ -339,7 +338,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
int err = 0;
int i;
- cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size);
+ cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size);
if (cur_entries < 1) {
mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n");
@@ -347,7 +346,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
}
for (i = 0; i < cur_entries; i++) {
- cur_ste = &cur_htbl->ste_arr[i];
+ cur_ste = &cur_htbl->chunk->ste_arr[i];
if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */
continue;
@@ -403,7 +402,7 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
/* Write new table to HW */
info.type = CONNECT_MISS;
- info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+ info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
dmn->info.caps.gvmi,
nic_dmn->type,
@@ -451,21 +450,21 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
* (48B len) which works only on first 32B
*/
mlx5dr_ste_set_hit_addr(dmn->ste_ctx,
- prev_htbl->ste_arr[0].hw_ste,
- new_htbl->chunk->icm_addr,
- new_htbl->chunk->num_of_entries);
+ prev_htbl->chunk->hw_ste_arr,
+ mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk),
+ mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk));
- ste_to_update = &prev_htbl->ste_arr[0];
+ ste_to_update = &prev_htbl->chunk->ste_arr[0];
} else {
mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
- cur_htbl->pointing_ste->hw_ste,
+ mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste),
new_htbl);
ste_to_update = cur_htbl->pointing_ste;
}
mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL,
- 0, ste_to_update->hw_ste, ste_info,
- update_list, false);
+ 0, mlx5dr_ste_get_hw_ste(ste_to_update),
+ ste_info, update_list, false);
return new_htbl;
@@ -494,10 +493,10 @@ static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule,
struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
enum mlx5dr_icm_chunk_size new_size;
- new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size);
+ new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size);
new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz);
- if (new_size == cur_htbl->chunk_size)
+ if (new_size == cur_htbl->chunk->size)
return NULL; /* Skip rehash, we already at the max size */
return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location,
@@ -664,13 +663,13 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
int threshold;
- if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size)
+ if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size)
return false;
if (!mlx5dr_ste_htbl_may_grow(htbl))
return false;
- if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size)
+ if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size)
return false;
threshold = mlx5dr_ste_htbl_increase_threshold(htbl);
@@ -760,6 +759,7 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
{
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste_send_info *ste_info;
+ u64 icm_addr;
/* Take ref on table, only on first time this ste is used */
mlx5dr_htbl_get(cur_htbl);
@@ -767,8 +767,8 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
/* new entry -> new branch */
list_add_tail(&ste->miss_list_node, miss_list);
- mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste,
- nic_matcher->e_anchor->chunk->icm_addr);
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
ste->ste_chain_location = ste_location;
@@ -827,7 +827,7 @@ dr_rule_handle_ste_branch(struct mlx5dr_rule *rule,
again:
index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl);
miss_list = &cur_htbl->chunk->miss_list[index];
- ste = &cur_htbl->ste_arr[index];
+ ste = &cur_htbl->chunk->ste_arr[index];
if (mlx5dr_ste_is_not_used(ste)) {
if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
@@ -863,7 +863,7 @@ again:
ste_location, send_ste_list);
if (!new_htbl) {
mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n",
- cur_htbl->chunk_size);
+ cur_htbl->chunk->size);
mlx5dr_htbl_put(cur_htbl);
} else {
cur_htbl = new_htbl;
@@ -979,14 +979,36 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
return false;
}
}
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC5) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc5);
+ e_idx = min(s_idx + sizeof(param->misc5), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
return true;
}
static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule)
{
+ /* Check if this nic rule was actually created, or was it skipped
+ * and only the other type of the RX/TX nic rule was created.
+ */
+ if (!nic_rule->last_rule_ste)
+ return 0;
+
mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
dr_rule_clean_rule_members(rule, nic_rule);
+
+ nic_rule->nic_matcher->rules--;
+ if (!nic_rule->nic_matcher->rules)
+ mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn,
+ nic_rule->nic_matcher);
+
mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
return 0;
@@ -1003,6 +1025,8 @@ static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
{
struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+ mlx5dr_dbg_rule_del(rule);
+
switch (dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
dr_rule_destroy_rule_nic(rule, &rule->rx);
@@ -1091,24 +1115,28 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
mlx5dr_domain_nic_lock(nic_dmn);
+ ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher);
+ if (ret)
+ goto free_hw_ste;
+
ret = mlx5dr_matcher_select_builders(matcher,
nic_matcher,
dr_rule_get_ipv(&param->outer),
dr_rule_get_ipv(&param->inner));
if (ret)
- goto free_hw_ste;
+ goto remove_from_nic_tbl;
/* Set the tag values inside the ste array */
ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
if (ret)
- goto free_hw_ste;
+ goto remove_from_nic_tbl;
/* Set the actions values/addresses inside the ste array */
ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions,
num_actions, hw_ste_arr,
&new_hw_ste_arr_sz);
if (ret)
- goto free_hw_ste;
+ goto remove_from_nic_tbl;
cur_htbl = nic_matcher->s_htbl;
@@ -1155,6 +1183,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
if (htbl)
mlx5dr_htbl_put(htbl);
+ nic_matcher->rules++;
+
mlx5dr_domain_nic_unlock(nic_dmn);
kfree(hw_ste_arr);
@@ -1168,6 +1198,11 @@ free_rule:
list_del(&ste_info->send_list);
kfree(ste_info);
}
+
+remove_from_nic_tbl:
+ if (!nic_matcher->rules)
+ mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher);
+
free_hw_ste:
mlx5dr_domain_nic_unlock(nic_dmn);
kfree(hw_ste_arr);
@@ -1257,6 +1292,8 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher,
if (ret)
goto remove_action_members;
+ INIT_LIST_HEAD(&rule->dbg_node);
+ mlx5dr_dbg_rule_add(rule);
return rule;
remove_action_members:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 00aef47d7682..ef19a66f5233 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -407,17 +407,17 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
int *iterations,
int *num_stes)
{
+ u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
int alloc_size;
- if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
- *iterations = htbl->chunk->byte_size /
- dmn->send_ring->max_post_send_size;
+ if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
+ *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
*byte_size = dmn->send_ring->max_post_send_size;
alloc_size = *byte_size;
*num_stes = *byte_size / DR_STE_SIZE;
} else {
*iterations = 1;
- *num_stes = htbl->chunk->num_of_entries;
+ *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
alloc_size = *num_stes * DR_STE_SIZE;
}
@@ -453,7 +453,7 @@ int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
send_info.write.length = size;
send_info.write.lkey = 0;
send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
- send_info.rkey = ste->htbl->chunk->rkey;
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
return dr_postsend_icm_data(dmn, &send_info);
}
@@ -462,7 +462,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
struct mlx5dr_ste_htbl *htbl,
u8 *formatted_ste, u8 *mask)
{
- u32 byte_size = htbl->chunk->byte_size;
+ u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
int num_stes_per_iter;
int iterations;
u8 *data;
@@ -486,7 +486,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
* need to add the bit_mask
*/
for (j = 0; j < num_stes_per_iter; j++) {
- struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j];
+ struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
u32 ste_off = j * DR_STE_SIZE;
if (mlx5dr_ste_is_not_used(ste)) {
@@ -495,7 +495,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
} else {
/* Copy data */
memcpy(data + ste_off,
- htbl->ste_arr[ste_index + j].hw_ste,
+ htbl->chunk->hw_ste_arr +
+ DR_STE_SIZE_REDUCED * (ste_index + j),
DR_STE_SIZE_REDUCED);
/* Copy bit_mask */
memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
@@ -511,8 +512,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
send_info.write.length = byte_size;
send_info.write.lkey = 0;
send_info.remote_addr =
- mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
- send_info.rkey = htbl->chunk->rkey;
+ mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
ret = dr_postsend_icm_data(dmn, &send_info);
if (ret)
@@ -530,7 +531,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
u8 *ste_init_data,
bool update_hw_ste)
{
- u32 byte_size = htbl->chunk->byte_size;
+ u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
int iterations;
int num_stes;
u8 *copy_dst;
@@ -546,7 +547,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
if (update_hw_ste) {
/* Copy the reduced STE to hash table ste_arr */
for (i = 0; i < num_stes; i++) {
- copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+ copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
}
}
@@ -568,8 +569,8 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
send_info.write.length = byte_size;
send_info.write.lkey = 0;
send_info.remote_addr =
- mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
- send_info.rkey = htbl->chunk->rkey;
+ mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
ret = dr_postsend_icm_data(dmn, &send_info);
if (ret)
@@ -591,8 +592,9 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
send_info.write.length = action->rewrite->num_of_actions *
DR_MODIFY_ACTION_SIZE;
send_info.write.lkey = 0;
- send_info.remote_addr = action->rewrite->chunk->mr_addr;
- send_info.rkey = action->rewrite->chunk->rkey;
+ send_info.remote_addr =
+ mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
ret = dr_postsend_icm_data(dmn, &send_info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 219a5474a8a4..09ebd3088857 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -25,6 +25,7 @@ bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
{
+ u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
u8 masked[DR_STE_SIZE_TAG] = {};
u32 crc32, index;
@@ -32,7 +33,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
int i;
/* Don't calculate CRC if the result is predicted */
- if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0)
+ if (num_entries == 1 || htbl->byte_mask == 0)
return 0;
/* Mask tag using byte mask, bit per byte */
@@ -45,7 +46,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
}
crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG);
- index = crc32 & (htbl->chunk->num_of_entries - 1);
+ index = crc32 & (num_entries - 1);
return index;
}
@@ -96,13 +97,11 @@ void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
}
static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
- struct mlx5dr_ste *ste, u64 miss_addr)
+ u8 *hw_ste, u64 miss_addr)
{
- u8 *hw_ste_p = ste->hw_ste;
-
- ste_ctx->set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
- ste_ctx->set_miss_addr(hw_ste_p, miss_addr);
- dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste);
+ ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE);
+ ste_ctx->set_miss_addr(hw_ste, miss_addr);
+ dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste);
}
void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
@@ -113,37 +112,45 @@ void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste)
{
- u32 index = ste - ste->htbl->ste_arr;
+ u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk);
+ u32 index = ste - ste->htbl->chunk->ste_arr;
- return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index;
+ return base_icm_addr + DR_STE_SIZE * index;
}
u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste)
{
- u32 index = ste - ste->htbl->ste_arr;
+ u32 index = ste - ste->htbl->chunk->ste_arr;
+
+ return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index;
+}
+
+u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste)
+{
+ u64 index = ste - ste->htbl->chunk->ste_arr;
- return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index;
+ return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index;
}
struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste)
{
- u32 index = ste - ste->htbl->ste_arr;
+ u32 index = ste - ste->htbl->chunk->ste_arr;
- return &ste->htbl->miss_list[index];
+ return &ste->htbl->chunk->miss_list[index];
}
static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx,
- struct mlx5dr_ste *ste,
+ u8 *hw_ste,
struct mlx5dr_ste_htbl *next_htbl)
{
struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
- u8 *hw_ste = ste->hw_ste;
ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask);
ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type);
- ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+ ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk),
+ mlx5dr_icm_pool_get_chunk_num_of_entries(chunk));
- dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste);
+ dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste);
}
bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
@@ -166,7 +173,8 @@ bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
*/
static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
{
- memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src),
+ DR_STE_SIZE_REDUCED);
dst->next_htbl = src->next_htbl;
if (dst->next_htbl)
dst->next_htbl->pointing_ste = dst;
@@ -184,18 +192,17 @@ dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste_htbl *stats_tbl)
{
u8 tmp_data_ste[DR_STE_SIZE] = {};
- struct mlx5dr_ste tmp_ste = {};
u64 miss_addr;
- tmp_ste.hw_ste = tmp_data_ste;
+ miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
/* Use temp ste because dr_ste_always_miss_addr
* touches bit_mask area which doesn't exist at ste->hw_ste.
+ * Need to use a full-sized (DR_STE_SIZE) hw_ste.
*/
- memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
- miss_addr = nic_matcher->e_anchor->chunk->icm_addr;
- dr_ste_always_miss_addr(ste_ctx, &tmp_ste, miss_addr);
- memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED);
+ dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr);
+ memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED);
list_del_init(&ste->miss_list_node);
@@ -237,7 +244,7 @@ dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher,
mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false);
/* Copy all 64 hw_ste bytes */
- memcpy(hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED);
sb_idx = ste->ste_chain_location - 1;
mlx5dr_ste_set_bit_mask(hw_ste,
nic_matcher->ste_builder[sb_idx].bit_mask);
@@ -273,12 +280,13 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx,
if (WARN_ON(!prev_ste))
return;
- miss_addr = ste_ctx->get_miss_addr(ste->hw_ste);
- ste_ctx->set_miss_addr(prev_ste->hw_ste, miss_addr);
+ miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste));
+ ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr);
mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0,
- prev_ste->hw_ste, ste_info,
- send_ste_list, true /* Copy data*/);
+ mlx5dr_ste_get_hw_ste(prev_ste),
+ ste_info, send_ste_list,
+ true /* Copy data*/);
list_del_init(&ste->miss_list_node);
@@ -364,9 +372,11 @@ void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx,
u8 *hw_ste,
struct mlx5dr_ste_htbl *next_htbl)
{
- struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+ u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk);
+ u32 num_entries =
+ mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk);
- ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+ ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries);
}
void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
@@ -385,15 +395,22 @@ void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_htbl_connect_info *connect_info)
{
bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
- struct mlx5dr_ste ste = {};
+ u8 tmp_hw_ste[DR_STE_SIZE] = {0};
ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi);
- ste.hw_ste = formatted_ste;
+ /* Use temp ste because dr_ste_always_miss_addr/hit_htbl
+ * touches bit_mask area which doesn't exist at ste->hw_ste.
+ * Need to use a full-sized (DR_STE_SIZE) hw_ste.
+ */
+ memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED);
if (connect_info->type == CONNECT_HIT)
- dr_ste_always_hit_htbl(ste_ctx, &ste, connect_info->hit_next_htbl);
+ dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste,
+ connect_info->hit_next_htbl);
else
- dr_ste_always_miss_addr(ste_ctx, &ste, connect_info->miss_icm_addr);
+ dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste,
+ connect_info->miss_icm_addr);
+ memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED);
}
int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
@@ -444,7 +461,8 @@ int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
/* Write new table to HW */
info.type = CONNECT_MISS;
- info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+ info.miss_icm_addr =
+ mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl,
&info, false)) {
mlx5dr_info(dmn, "Failed writing table to HW\n");
@@ -470,6 +488,7 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
{
struct mlx5dr_icm_chunk *chunk;
struct mlx5dr_ste_htbl *htbl;
+ u32 num_entries;
int i;
htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
@@ -483,22 +502,18 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
htbl->chunk = chunk;
htbl->lu_type = lu_type;
htbl->byte_mask = byte_mask;
- htbl->ste_arr = chunk->ste_arr;
- htbl->hw_ste_arr = chunk->hw_ste_arr;
- htbl->miss_list = chunk->miss_list;
htbl->refcount = 0;
+ num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
- for (i = 0; i < chunk->num_of_entries; i++) {
- struct mlx5dr_ste *ste = &htbl->ste_arr[i];
+ for (i = 0; i < num_entries; i++) {
+ struct mlx5dr_ste *ste = &chunk->ste_arr[i];
- ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
ste->htbl = htbl;
ste->refcount = 0;
INIT_LIST_HEAD(&ste->miss_list_node);
- INIT_LIST_HEAD(&htbl->miss_list[i]);
+ INIT_LIST_HEAD(&chunk->miss_list[i]);
}
- htbl->chunk_size = chunk_size;
return htbl;
out_free_htbl:
@@ -523,8 +538,8 @@ void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste_actions_attr *attr,
u32 *added_stes)
{
- ste_ctx->set_actions_tx(dmn, action_type_set, hw_ste_arr,
- attr, added_stes);
+ ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps,
+ hw_ste_arr, attr, added_stes);
}
void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
@@ -534,8 +549,8 @@ void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste_actions_attr *attr,
u32 *added_stes)
{
- ste_ctx->set_actions_rx(dmn, action_type_set, hw_ste_arr,
- attr, added_stes);
+ ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps,
+ hw_ste_arr, attr, added_stes);
}
const struct mlx5dr_ste_action_modify_field *
@@ -602,12 +617,34 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
used_hw_action_num);
}
+static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
+ struct mlx5dr_match_spec *spec)
+{
+ if (spec->ip_version) {
+ if (spec->ip_version != 0xf) {
+ mlx5dr_err(dmn,
+ "Partial ip_version mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+ } else if (spec->ethertype != 0xffff &&
+ (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) {
+ mlx5dr_err(dmn,
+ "Partial/no ethertype mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
u8 match_criteria,
struct mlx5dr_match_param *mask,
struct mlx5dr_match_param *value)
{
- if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
+ if (value)
+ return 0;
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
mlx5dr_err(dmn,
"Partial mask source_port is not supported\n");
@@ -621,6 +658,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
}
}
+ if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->outer))
+ return -EINVAL;
+
+ if ((match_criteria & DR_MATCHER_CRITERIA_INNER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->inner))
+ return -EINVAL;
+
return 0;
}
@@ -719,6 +764,8 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bo
spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr);
spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr);
+ spec->geneve_tlv_option_0_exist =
+ IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr);
spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr);
spec->outer_ipv6_flow_label =
@@ -761,6 +808,7 @@ static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bo
spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr);
spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr);
+ spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr);
spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr);
spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr);
@@ -880,6 +928,26 @@ static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec,
IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr);
}
+static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr)
+{
+ spec->macsec_tag_0 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr);
+ spec->macsec_tag_1 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr);
+ spec->macsec_tag_2 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr);
+ spec->macsec_tag_3 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr);
+ spec->tunnel_header_0 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr);
+ spec->tunnel_header_1 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr);
+ spec->tunnel_header_2 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr);
+ spec->tunnel_header_3 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr);
+}
+
void mlx5dr_ste_copy_param(u8 match_criteria,
struct mlx5dr_match_param *set_param,
struct mlx5dr_match_parameters *mask,
@@ -966,6 +1034,20 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
}
dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr);
}
+
+ param_location += sizeof(struct mlx5dr_match_misc4);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC5) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc5)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr);
+ }
}
void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx,
@@ -1180,6 +1262,21 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask);
}
+void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init)
+ return;
+
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask);
+}
+
void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
@@ -1269,15 +1366,24 @@ void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
ste_ctx->build_flex_parser_1_init(sb, mask);
}
-static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = {
- [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0,
- [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1,
-};
+void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_header_0_1_init(sb, mask);
+}
struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version)
{
- if (version > MLX5_STEERING_FORMAT_CONNECTX_6DX)
- return NULL;
+ if (version == MLX5_STEERING_FORMAT_CONNECTX_5)
+ return mlx5dr_ste_get_ctx_v0();
+ else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX)
+ return mlx5dr_ste_get_ctx_v1();
+ else if (version == MLX5_STEERING_FORMAT_CONNECTX_7)
+ return mlx5dr_ste_get_ctx_v2();
- return mlx5dr_ste_ctx_arr[version];
+ return NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 2d52d065dc8b..17513baff9b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -135,12 +135,14 @@ struct mlx5dr_ste_ctx {
void DR_STE_CTX_BUILDER(tnl_vxlan_gpe);
void DR_STE_CTX_BUILDER(tnl_geneve);
void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt);
+ void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist);
void DR_STE_CTX_BUILDER(register_0);
void DR_STE_CTX_BUILDER(register_1);
void DR_STE_CTX_BUILDER(src_gvmi_qpn);
void DR_STE_CTX_BUILDER(flex_parser_0);
void DR_STE_CTX_BUILDER(flex_parser_1);
void DR_STE_CTX_BUILDER(tnl_gtpu);
+ void DR_STE_CTX_BUILDER(tnl_header_0_1);
void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0);
void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1);
@@ -159,11 +161,13 @@ struct mlx5dr_ste_ctx {
u32 actions_caps;
void (*set_actions_rx)(struct mlx5dr_domain *dmn,
u8 *action_type_set,
+ u32 actions_caps,
u8 *hw_ste_arr,
struct mlx5dr_ste_actions_attr *attr,
u32 *added_stes);
void (*set_actions_tx)(struct mlx5dr_domain *dmn,
u8 *action_type_set,
+ u32 actions_caps,
u8 *hw_ste_arr,
struct mlx5dr_ste_actions_attr *attr,
u32 *added_stes);
@@ -195,7 +199,8 @@ struct mlx5dr_ste_ctx {
void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size);
};
-extern struct mlx5dr_ste_ctx ste_ctx_v0;
-extern struct mlx5dr_ste_ctx ste_ctx_v1;
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void);
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void);
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void);
#endif /* _DR_STE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index b0649c2877dd..2010d4ac6519 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -80,6 +80,7 @@ enum {
DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18,
DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f,
DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30,
+ DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34,
DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE,
};
@@ -407,6 +408,7 @@ static void dr_ste_v0_arr_init_next(u8 **last_ste,
static void
dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
u8 *action_type_set,
+ u32 actions_caps,
u8 *last_ste,
struct mlx5dr_ste_actions_attr *attr,
u32 *added_stes)
@@ -418,7 +420,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
* encapsulation. The reason for that is that we support
* modify headers for outer headers only
*/
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
dr_ste_v0_set_rewrite_actions(last_ste,
attr->modify_actions,
@@ -476,6 +478,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
static void
dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
u8 *action_type_set,
+ u32 actions_caps,
u8 *last_ste,
struct mlx5dr_ste_actions_attr *attr,
u32 *added_stes)
@@ -510,7 +513,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
}
}
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
@@ -1151,6 +1154,7 @@ dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
+ DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl);
return 0;
}
@@ -1704,7 +1708,7 @@ static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id,
u32 id = *misc4_field_id;
u8 *parser_ptr;
- if (parser_is_used[id])
+ if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id])
return;
parser_is_used[id] = true;
@@ -1875,7 +1879,28 @@ dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag;
}
-struct mlx5dr_ste_ctx ste_ctx_v0 = {
+static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ struct mlx5dr_match_misc5 *misc5 = &value->misc5;
+
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0);
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER;
+ dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag;
+}
+
+static struct mlx5dr_ste_ctx ste_ctx_v0 = {
/* Builders */
.build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init,
.build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init,
@@ -1903,6 +1928,7 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
.build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init,
.build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init,
.build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init,
.build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init,
.build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init,
@@ -1927,3 +1953,8 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
.set_action_copy = &dr_ste_v0_set_action_copy,
.set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list,
};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void)
+{
+ return &ste_ctx_v0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index cb9cf67b0a02..ee677a5c76be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -3,7 +3,7 @@
#include <linux/types.h>
#include "mlx5_ifc_dr_ste_v1.h"
-#include "dr_ste.h"
+#include "dr_ste_v1.h"
#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \
((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \
@@ -47,6 +47,7 @@ enum {
DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f,
DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f,
DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011,
DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111,
DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112,
DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113,
@@ -88,6 +89,7 @@ enum dr_ste_v1_action_id {
DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d,
DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e,
DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f,
+ DR_STE_V1_ACTION_ID_ASO = 0x12,
DR_STE_V1_ACTION_ID_TRAILER = 0x13,
DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14,
DR_STE_V1_ACTION_ID_MAX = 0x21,
@@ -120,12 +122,16 @@ enum {
DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70,
DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b,
DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c,
- DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2 = 0x8c,
- DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3 = 0x8d,
- DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4 = 0x8e,
- DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5 = 0x8f,
- DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6 = 0x90,
- DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7 = 0x91,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91,
+};
+
+enum dr_ste_v1_aso_ctx_type {
+ DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2,
};
static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = {
@@ -222,22 +228,22 @@ static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field
.hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
- .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6, .start = 0, .end = 31,
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
- .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7, .start = 0, .end = 31,
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
- .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4, .start = 0, .end = 31,
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
- .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5, .start = 0, .end = 31,
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
- .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2, .start = 0, .end = 31,
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
- .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3, .start = 0, .end = 31,
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31,
},
[MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
.hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31,
@@ -261,7 +267,7 @@ static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type)
MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type);
}
-static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
{
u64 index = miss_addr >> 6;
@@ -269,7 +275,7 @@ static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index);
}
-static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
+u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
{
u64 index =
((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
@@ -278,12 +284,12 @@ static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
return index << 6;
}
-static void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask)
+void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask)
{
MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask);
}
-static u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p)
+u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p)
{
return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask);
}
@@ -294,13 +300,13 @@ static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type)
MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF);
}
-static void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type)
+void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type)
{
MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8);
MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF);
}
-static u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p)
+u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p)
{
u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format);
u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx);
@@ -313,7 +319,7 @@ static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi);
}
-static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
+void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
{
u64 index = (icm_addr >> 5) | ht_size;
@@ -321,8 +327,7 @@ static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index);
}
-static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type,
- bool is_rx, u16 gvmi)
+void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi)
{
dr_ste_v1_set_lu_type(hw_ste_p, lu_type);
dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
@@ -332,8 +337,7 @@ static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type,
MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi);
}
-static void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p,
- u32 ste_size)
+void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size)
{
u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL;
u8 *mask = tag + DR_STE_SIZE_TAG;
@@ -495,6 +499,27 @@ static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
dr_ste_v1_set_reparse(hw_ste_p);
}
+static void dr_ste_v1_set_aso_flow_meter(u8 *d_action,
+ u32 object_id,
+ u32 offset,
+ u8 dest_reg_id,
+ u8 init_color)
+{
+ MLX5_SET(ste_double_action_aso_v1, d_action, action_id,
+ DR_STE_V1_ACTION_ID_ASO);
+ MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_number,
+ object_id + (offset / MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ));
+ /* Convert reg_c index to HW 64bit index */
+ MLX5_SET(ste_double_action_aso_v1, d_action, dest_reg_id,
+ (dest_reg_id - 1) / 2);
+ MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_type,
+ DR_STE_V1_ASO_CTX_TYPE_POLICERS);
+ MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.line_id,
+ offset % MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ);
+ MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.initial_color,
+ init_color);
+}
+
static void dr_ste_v1_arr_init_next_match(u8 **last_ste,
u32 *added_stes,
u16 gvmi)
@@ -510,11 +535,12 @@ static void dr_ste_v1_arr_init_next_match(u8 **last_ste,
memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action));
}
-static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
- u8 *action_type_set,
- u8 *last_ste,
- struct mlx5dr_ste_actions_attr *attr,
- u32 *added_stes)
+void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
{
u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
@@ -532,7 +558,10 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
action_sz -= DR_STE_ACTION_SINGLE_SZ;
action += DR_STE_ACTION_SINGLE_SZ;
- allow_modify_hdr = false;
+
+ /* Check if vlan_pop and modify_hdr on same STE is supported */
+ if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY))
+ allow_modify_hdr = false;
}
if (action_type_set[DR_ACTION_TYP_CTR])
@@ -626,15 +655,31 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
action += DR_STE_ACTION_SINGLE_SZ;
}
+ if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_aso_flow_meter(action,
+ attr->aso_flow_meter.obj_id,
+ attr->aso_flow_meter.offset,
+ attr->aso_flow_meter.dest_reg_id,
+ attr->aso_flow_meter.init_color);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
-static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
- u8 *action_type_set,
- u8 *last_ste,
- struct mlx5dr_ste_actions_attr *attr,
- u32 *added_stes)
+void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
{
u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
@@ -676,13 +721,16 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
action_sz = DR_STE_ACTION_TRIPLE_SZ;
- allow_modify_hdr = false;
- allow_ctr = false;
}
dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
action_sz -= DR_STE_ACTION_SINGLE_SZ;
action += DR_STE_ACTION_SINGLE_SZ;
+ allow_ctr = false;
+
+ /* Check if vlan_pop and modify_hdr on same STE is supported */
+ if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY))
+ allow_modify_hdr = false;
}
if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
@@ -730,9 +778,9 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
action_sz = DR_STE_ACTION_TRIPLE_SZ;
allow_modify_hdr = true;
- allow_ctr = false;
}
dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+ allow_ctr = false;
}
if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
@@ -795,15 +843,30 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
action += DR_STE_ACTION_SINGLE_SZ;
}
+ if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_aso_flow_meter(action,
+ attr->aso_flow_meter.obj_id,
+ attr->aso_flow_meter.offset,
+ attr->aso_flow_meter.dest_reg_id,
+ attr->aso_flow_meter.init_color);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
-static void dr_ste_v1_set_action_set(u8 *d_action,
- u8 hw_field,
- u8 shifter,
- u8 length,
- u32 data)
+void dr_ste_v1_set_action_set(u8 *d_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
{
shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET);
@@ -813,11 +876,11 @@ static void dr_ste_v1_set_action_set(u8 *d_action,
MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data);
}
-static void dr_ste_v1_set_action_add(u8 *d_action,
- u8 hw_field,
- u8 shifter,
- u8 length,
- u32 data)
+void dr_ste_v1_set_action_add(u8 *d_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
{
shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD);
@@ -827,12 +890,12 @@ static void dr_ste_v1_set_action_add(u8 *d_action,
MLX5_SET(ste_double_action_add_v1, d_action, add_value, data);
}
-static void dr_ste_v1_set_action_copy(u8 *d_action,
- u8 dst_hw_field,
- u8 dst_shifter,
- u8 dst_len,
- u8 src_hw_field,
- u8 src_shifter)
+void dr_ste_v1_set_action_copy(u8 *d_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
{
dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
@@ -847,11 +910,11 @@ static void dr_ste_v1_set_action_copy(u8 *d_action,
#define DR_STE_DECAP_L3_ACTION_NUM 8
#define DR_STE_L2_HDR_MAX_SZ 20
-static int dr_ste_v1_set_action_decap_l3_list(void *data,
- u32 data_sz,
- u8 *hw_action,
- u32 hw_action_sz,
- u16 *used_hw_action_num)
+int dr_ste_v1_set_action_decap_l3_list(void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num)
{
u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {};
void *data_ptr = padded_data;
@@ -976,8 +1039,8 @@ static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask);
@@ -1000,8 +1063,8 @@ static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask);
@@ -1024,8 +1087,8 @@ static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask);
@@ -1059,8 +1122,8 @@ static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *va
return 0;
}
-static void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask);
@@ -1078,8 +1141,8 @@ static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param
DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid);
DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi);
DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio);
- DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); // ?
- DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); // ?
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype);
DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version);
if (mask->svlan_tag || mask->cvlan_tag) {
@@ -1200,8 +1263,8 @@ static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
}
-static void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask);
@@ -1233,8 +1296,8 @@ static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
}
-static void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask);
@@ -1313,8 +1376,8 @@ static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask);
@@ -1330,12 +1393,13 @@ static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit);
+ DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl);
return 0;
}
-static void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask);
@@ -1374,8 +1438,8 @@ static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask);
@@ -1398,8 +1462,8 @@ static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask);
@@ -1425,8 +1489,8 @@ static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask);
@@ -1470,8 +1534,8 @@ static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask);
@@ -1505,8 +1569,8 @@ static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *valu
return 0;
}
-static void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
@@ -1546,8 +1610,8 @@ static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *valu
return 0;
}
-static void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
@@ -1593,8 +1657,8 @@ static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask);
@@ -1615,8 +1679,8 @@ static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask);
@@ -1642,8 +1706,8 @@ static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask);
@@ -1672,9 +1736,8 @@ dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void
-dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask);
@@ -1702,9 +1765,8 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void
-dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask);
@@ -1713,6 +1775,27 @@ dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag;
}
+static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ struct mlx5dr_match_misc5 *misc5 = &value->misc5;
+
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0);
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag;
+}
+
static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
u8 *tag)
@@ -1727,8 +1810,8 @@ static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask);
@@ -1751,8 +1834,8 @@ static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask);
@@ -1815,8 +1898,8 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
@@ -1833,7 +1916,7 @@ static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id,
u32 id = *misc4_field_id;
u8 *parser_ptr;
- if (parser_is_used[id])
+ if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id])
return;
parser_is_used[id] = true;
@@ -1870,8 +1953,8 @@ static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
@@ -1879,8 +1962,8 @@ static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
}
-static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
@@ -1904,7 +1987,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *va
return 0;
}
-static void
+void
dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask)
{
@@ -1921,6 +2004,32 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag;
}
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ if (misc->geneve_tlv_option_0_exist) {
+ MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id);
+ misc->geneve_tlv_option_0_exist = 0;
+ }
+
+ return 0;
+}
+
+void
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK;
+ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag;
+}
+
static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
u8 *tag)
@@ -1934,8 +2043,8 @@ static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *v
return 0;
}
-static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask)
+void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
{
dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
@@ -1960,7 +2069,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void
+void
dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask)
{
@@ -1987,7 +2096,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
return 0;
}
-static void
+void
dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask)
{
@@ -1998,7 +2107,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag;
}
-struct mlx5dr_ste_ctx ste_ctx_v1 = {
+static struct mlx5dr_ste_ctx ste_ctx_v1 = {
/* Builders */
.build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
.build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init,
@@ -2020,12 +2129,14 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
.build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
.build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init,
.build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init,
.build_register_0_init = &dr_ste_v1_build_register_0_init,
.build_register_1_init = &dr_ste_v1_build_register_1_init,
.build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init,
.build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init,
.build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init,
.build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init,
.build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
.build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
@@ -2041,7 +2152,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
/* Actions */
.actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
DR_STE_CTX_ACTION_CAP_RX_PUSH |
- DR_STE_CTX_ACTION_CAP_RX_ENCAP,
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP |
+ DR_STE_CTX_ACTION_CAP_POP_MDFY,
.set_actions_rx = &dr_ste_v1_set_actions_rx,
.set_actions_tx = &dr_ste_v1_set_actions_tx,
.modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
@@ -2053,3 +2165,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void)
+{
+ return &ste_ctx_v1;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
new file mode 100644
index 000000000000..8a1d49790c6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef _DR_STE_V1_
+#define _DR_STE_V1_
+
+#include "dr_types.h"
+#include "dr_ste.h"
+
+void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr);
+u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p);
+void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask);
+u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p);
+void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type);
+u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p);
+void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size);
+void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi);
+void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size);
+void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set,
+ u32 actions_caps, u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
+void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set,
+ u32 actions_caps, u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
+void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter,
+ u8 length, u32 data);
+void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter,
+ u8 length, u32 data);
+void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter,
+ u8 dst_len, u8 src_hw_field, u8 src_shifter);
+int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action,
+ u32 hw_action_sz, u16 *used_hw_action_num);
+void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+
+#endif /* _DR_STE_V1_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
new file mode 100644
index 000000000000..c60fddd125d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "dr_ste_v1.h"
+
+enum {
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00,
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01,
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02,
+ DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08,
+ DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09,
+ DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e,
+ DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18,
+ DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f,
+ DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e,
+ DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f,
+ DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f,
+ DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70,
+ DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b,
+ DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
+ },
+};
+
+static struct mlx5dr_ste_ctx ste_ctx_v2 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v1_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v1_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_tnl_geneve_tlv_opt_exist_init =
+ &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init,
+ .build_register_0_init = &dr_ste_v1_build_register_0_init,
+ .build_register_1_init = &dr_ste_v1_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v1_init,
+ .set_next_lu_type = &dr_ste_v1_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v1_set_miss_addr,
+ .get_miss_addr = &dr_ste_v1_get_miss_addr,
+ .set_hit_addr = &dr_ste_v1_set_hit_addr,
+ .set_byte_mask = &dr_ste_v1_set_byte_mask,
+ .get_byte_mask = &dr_ste_v1_get_byte_mask,
+
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
+ DR_STE_CTX_ACTION_CAP_RX_PUSH |
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP,
+ .set_actions_rx = &dr_ste_v1_set_actions_rx,
+ .set_actions_tx = &dr_ste_v1_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v2_action_modify_field_arr,
+ .set_action_set = &dr_ste_v1_set_action_set,
+ .set_action_add = &dr_ste_v1_set_action_add,
+ .set_action_copy = &dr_ste_v1_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+
+ /* Send */
+ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void)
+{
+ return &ste_ctx_v2;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index 30ae3cda6d2e..31d443dd8386 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -3,12 +3,49 @@
#include "dr_types.h"
-int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
- struct mlx5dr_action *action)
+static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_action *action)
{
- struct mlx5dr_matcher *last_matcher = NULL;
+ struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL;
struct mlx5dr_htbl_connect_info info;
struct mlx5dr_ste_htbl *last_htbl;
+ struct mlx5dr_icm_chunk *chunk;
+ int ret;
+
+ if (!list_empty(&nic_tbl->nic_matcher_list))
+ last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list,
+ struct mlx5dr_matcher_rx_tx,
+ list_node);
+
+ if (last_nic_matcher)
+ last_htbl = last_nic_matcher->e_anchor;
+ else
+ last_htbl = nic_tbl->s_anchor;
+
+ if (action) {
+ chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ?
+ action->dest_tbl->tbl->rx.s_anchor->chunk :
+ action->dest_tbl->tbl->tx.s_anchor->chunk;
+ nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
+ } else {
+ nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr;
+ }
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn,
+ last_htbl, &info, true);
+ if (ret)
+ mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret);
+
+ return ret;
+}
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action)
+{
int ret;
if (action && action->action_type != DR_ACTION_TYP_FT)
@@ -16,56 +53,18 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
mlx5dr_domain_lock(tbl->dmn);
- if (!list_empty(&tbl->matcher_list))
- last_matcher = list_last_entry(&tbl->matcher_list,
- struct mlx5dr_matcher,
- matcher_list);
-
if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX ||
tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
- if (last_matcher)
- last_htbl = last_matcher->rx.e_anchor;
- else
- last_htbl = tbl->rx.s_anchor;
-
- tbl->rx.default_icm_addr = action ?
- action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr :
- tbl->rx.nic_dmn->default_icm_addr;
-
- info.type = CONNECT_MISS;
- info.miss_icm_addr = tbl->rx.default_icm_addr;
-
- ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
- tbl->rx.nic_dmn,
- last_htbl,
- &info, true);
- if (ret) {
- mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret);
+ ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action);
+ if (ret)
goto out;
- }
}
if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX ||
tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
- if (last_matcher)
- last_htbl = last_matcher->tx.e_anchor;
- else
- last_htbl = tbl->tx.s_anchor;
-
- tbl->tx.default_icm_addr = action ?
- action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr :
- tbl->tx.nic_dmn->default_icm_addr;
-
- info.type = CONNECT_MISS;
- info.miss_icm_addr = tbl->tx.default_icm_addr;
-
- ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
- tbl->tx.nic_dmn,
- last_htbl, &info, true);
- if (ret) {
- mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret);
+ ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action);
+ if (ret)
goto out;
- }
}
/* Release old action */
@@ -122,6 +121,8 @@ static int dr_table_init_nic(struct mlx5dr_domain *dmn,
struct mlx5dr_htbl_connect_info info;
int ret;
+ INIT_LIST_HEAD(&nic_tbl->nic_matcher_list);
+
nic_tbl->default_icm_addr = nic_dmn->default_icm_addr;
nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
@@ -213,7 +214,7 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
tbl->table_type);
}
-static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
+static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid)
{
bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
@@ -223,10 +224,10 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
int ret;
if (tbl->rx.s_anchor)
- icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr;
+ icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk);
if (tbl->tx.s_anchor)
- icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr;
+ icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk);
ft_attr.table_type = tbl->table_type;
ft_attr.icm_addr_rx = icm_addr_rx;
@@ -235,6 +236,7 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
ft_attr.sw_owner = true;
ft_attr.decap_en = en_decap;
ft_attr.reformat_en = en_encap;
+ ft_attr.uid = uid;
ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr,
NULL, &tbl->table_id);
@@ -242,7 +244,8 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
return ret;
}
-struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags)
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level,
+ u32 flags, u16 uid)
{
struct mlx5dr_table *tbl;
int ret;
@@ -262,10 +265,12 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u
if (ret)
goto free_tbl;
- ret = dr_table_create_sw_owned_tbl(tbl);
+ ret = dr_table_create_sw_owned_tbl(tbl, uid);
if (ret)
goto uninit_tbl;
+ INIT_LIST_HEAD(&tbl->dbg_node);
+ mlx5dr_dbg_tbl_add(tbl);
return tbl;
uninit_tbl:
@@ -281,9 +286,10 @@ int mlx5dr_table_destroy(struct mlx5dr_table *tbl)
{
int ret;
- if (refcount_read(&tbl->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1))
return -EBUSY;
+ mlx5dr_dbg_tbl_del(tbl);
ret = dr_table_destroy_sw_owned_tbl(tbl);
if (ret)
return ret;
@@ -303,3 +309,8 @@ u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl)
{
return tbl->table_id;
}
+
+struct mlx5dr_table *mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+ return ft->fs_dr_table.dr_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 2333c2439c28..1777a1e508e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -11,6 +11,7 @@
#include "lib/mlx5.h"
#include "mlx5_ifc_dr.h"
#include "mlx5dr.h"
+#include "dr_dbg.h"
#define DR_RULE_MAX_STES 18
#define DR_ACTION_MAX_STES 5
@@ -90,6 +91,7 @@ enum mlx5dr_ste_ctx_action_cap {
DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0,
DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1,
DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2,
+ DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3,
};
enum {
@@ -104,7 +106,8 @@ enum mlx5dr_matcher_criteria {
DR_MATCHER_CRITERIA_MISC2 = 1 << 3,
DR_MATCHER_CRITERIA_MISC3 = 1 << 4,
DR_MATCHER_CRITERIA_MISC4 = 1 << 5,
- DR_MATCHER_CRITERIA_MAX = 1 << 6,
+ DR_MATCHER_CRITERIA_MISC5 = 1 << 6,
+ DR_MATCHER_CRITERIA_MAX = 1 << 7,
};
enum mlx5dr_action_type {
@@ -124,6 +127,7 @@ enum mlx5dr_action_type {
DR_ACTION_TYP_INSERT_HDR,
DR_ACTION_TYP_REMOVE_HDR,
DR_ACTION_TYP_SAMPLER,
+ DR_ACTION_TYP_ASO_FLOW_METER,
DR_ACTION_TYP_MAX,
};
@@ -144,10 +148,12 @@ struct mlx5dr_matcher_rx_tx;
struct mlx5dr_ste_ctx;
struct mlx5dr_ste {
- u8 *hw_ste;
/* refcount: indicates the num of rules that using this ste */
u32 refcount;
+ /* this ste is part of a rule, located in ste's chain */
+ u8 ste_chain_location;
+
/* attached to the miss_list head at each htbl entry */
struct list_head miss_list_node;
@@ -158,9 +164,6 @@ struct mlx5dr_ste {
/* The rule this STE belongs to */
struct mlx5dr_rule_rx_tx *rule_rx_tx;
-
- /* this ste is part of a rule, located in ste's chain */
- u8 ste_chain_location;
};
struct mlx5dr_ste_htbl_ctrl {
@@ -178,14 +181,7 @@ struct mlx5dr_ste_htbl {
u16 byte_mask;
u32 refcount;
struct mlx5dr_icm_chunk *chunk;
- struct mlx5dr_ste *ste_arr;
- u8 *hw_ste_arr;
-
- struct list_head *miss_list;
-
- enum mlx5dr_icm_chunk_size chunk_size;
struct mlx5dr_ste *pointing_ste;
-
struct mlx5dr_ste_htbl_ctrl ctrl;
};
@@ -276,6 +272,13 @@ struct mlx5dr_ste_actions_attr {
int count;
u32 headers[MLX5DR_MAX_VLANS];
} vlans;
+
+ struct {
+ u32 obj_id;
+ u32 offset;
+ u8 dest_reg_id;
+ u8 init_color;
+ } aso_flow_meter;
};
void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
@@ -440,6 +443,11 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_match_param *mask,
struct mlx5dr_cmd_caps *caps,
bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
@@ -454,6 +462,10 @@ void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_match_param *mask,
struct mlx5dr_cmd_caps *caps,
bool inner, bool rx);
+void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
@@ -494,57 +506,66 @@ struct mlx5dr_match_spec {
/* Incoming packet Ethertype - this is the Ethertype
* following the last VLAN tag of the packet
*/
- u32 ethertype:16;
u32 smac_15_0:16; /* Source MAC address of incoming packet */
+ u32 ethertype:16;
+
u32 dmac_47_16; /* Destination MAC address of incoming packet */
- /* VLAN ID of first VLAN tag in the incoming packet.
+
+ u32 dmac_15_0:16; /* Destination MAC address of incoming packet */
+ /* Priority of first VLAN tag in the incoming packet.
* Valid only when cvlan_tag==1 or svlan_tag==1
*/
- u32 first_vid:12;
+ u32 first_prio:3;
/* CFI bit of first VLAN tag in the incoming packet.
* Valid only when cvlan_tag==1 or svlan_tag==1
*/
u32 first_cfi:1;
- /* Priority of first VLAN tag in the incoming packet.
+ /* VLAN ID of first VLAN tag in the incoming packet.
* Valid only when cvlan_tag==1 or svlan_tag==1
*/
- u32 first_prio:3;
- u32 dmac_15_0:16; /* Destination MAC address of incoming packet */
- /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
- * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
+ u32 first_vid:12;
+
+ u32 ip_protocol:8; /* IP protocol */
+ /* Differentiated Services Code Point derived from
+ * Traffic Class/TOS field of IPv6/v4
*/
- u32 tcp_flags:9;
- u32 ip_version:4; /* IP version */
- u32 frag:1; /* Packet is an IP fragment */
- /* The first vlan in the packet is s-vlan (0x8a88).
- * cvlan_tag and svlan_tag cannot be set together
+ u32 ip_dscp:6;
+ /* Explicit Congestion Notification derived from
+ * Traffic Class/TOS field of IPv6/v4
*/
- u32 svlan_tag:1;
+ u32 ip_ecn:2;
/* The first vlan in the packet is c-vlan (0x8100).
* cvlan_tag and svlan_tag cannot be set together
*/
u32 cvlan_tag:1;
- /* Explicit Congestion Notification derived from
- * Traffic Class/TOS field of IPv6/v4
+ /* The first vlan in the packet is s-vlan (0x8a88).
+ * cvlan_tag and svlan_tag cannot be set together
*/
- u32 ip_ecn:2;
- /* Differentiated Services Code Point derived from
- * Traffic Class/TOS field of IPv6/v4
+ u32 svlan_tag:1;
+ u32 frag:1; /* Packet is an IP fragment */
+ u32 ip_version:4; /* IP version */
+ /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
+ * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
*/
- u32 ip_dscp:6;
- u32 ip_protocol:8; /* IP protocol */
+ u32 tcp_flags:9;
+
+ /* TCP source port.;tcp and udp sport/dport are mutually exclusive */
+ u32 tcp_sport:16;
/* TCP destination port.
* tcp and udp sport/dport are mutually exclusive
*/
u32 tcp_dport:16;
- /* TCP source port.;tcp and udp sport/dport are mutually exclusive */
- u32 tcp_sport:16;
+
+ u32 reserved_auto1:16;
+ u32 ipv4_ihl:4;
+ u32 reserved_auto2:4;
u32 ttl_hoplimit:8;
- u32 reserved:24;
- /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
- u32 udp_dport:16;
+
/* UDP source port.;tcp and udp sport/dport are mutually exclusive */
u32 udp_sport:16;
+ /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
+ u32 udp_dport:16;
+
/* IPv6 source address of incoming packets
* For IPv4 address use bits 31:0 (rest of the bits are reserved)
* This field should be qualified by an appropriate ethertype
@@ -588,96 +609,114 @@ struct mlx5dr_match_spec {
};
struct mlx5dr_match_misc {
- u32 source_sqn:24; /* Source SQN */
- u32 source_vhca_port:4;
- /* used with GRE, sequence number exist when gre_s_present == 1 */
- u32 gre_s_present:1;
- /* used with GRE, key exist when gre_k_present == 1 */
- u32 gre_k_present:1;
- u32 reserved_auto1:1;
/* used with GRE, checksum exist when gre_c_present == 1 */
u32 gre_c_present:1;
+ u32 reserved_auto1:1;
+ /* used with GRE, key exist when gre_k_present == 1 */
+ u32 gre_k_present:1;
+ /* used with GRE, sequence number exist when gre_s_present == 1 */
+ u32 gre_s_present:1;
+ u32 source_vhca_port:4;
+ u32 source_sqn:24; /* Source SQN */
+
+ u32 source_eswitch_owner_vhca_id:16;
/* Source port.;0xffff determines wire port */
u32 source_port:16;
- u32 source_eswitch_owner_vhca_id:16;
- /* VLAN ID of first VLAN tag the inner header of the incoming packet.
- * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
- */
- u32 inner_second_vid:12;
- /* CFI bit of first VLAN tag in the inner header of the incoming packet.
- * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
- */
- u32 inner_second_cfi:1;
- /* Priority of second VLAN tag in the inner header of the incoming packet.
- * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
- */
- u32 inner_second_prio:3;
- /* VLAN ID of first VLAN tag the outer header of the incoming packet.
+
+ /* Priority of second VLAN tag in the outer header of the incoming packet.
* Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
*/
- u32 outer_second_vid:12;
+ u32 outer_second_prio:3;
/* CFI bit of first VLAN tag in the outer header of the incoming packet.
* Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
*/
u32 outer_second_cfi:1;
- /* Priority of second VLAN tag in the outer header of the incoming packet.
+ /* VLAN ID of first VLAN tag the outer header of the incoming packet.
* Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
*/
- u32 outer_second_prio:3;
- u32 gre_protocol:16; /* GRE Protocol (outer) */
- u32 reserved_auto3:12;
- /* The second vlan in the inner header of the packet is s-vlan (0x8a88).
- * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ u32 outer_second_vid:12;
+ /* Priority of second VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
*/
- u32 inner_second_svlan_tag:1;
- /* The second vlan in the outer header of the packet is s-vlan (0x8a88).
+ u32 inner_second_prio:3;
+ /* CFI bit of first VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_cfi:1;
+ /* VLAN ID of first VLAN tag the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_vid:12;
+
+ u32 outer_second_cvlan_tag:1;
+ u32 inner_second_cvlan_tag:1;
+ /* The second vlan in the outer header of the packet is c-vlan (0x8100).
* outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
*/
u32 outer_second_svlan_tag:1;
/* The second vlan in the inner header of the packet is c-vlan (0x8100).
* inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
*/
- u32 inner_second_cvlan_tag:1;
- /* The second vlan in the outer header of the packet is c-vlan (0x8100).
+ u32 inner_second_svlan_tag:1;
+ /* The second vlan in the outer header of the packet is s-vlan (0x8a88).
* outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
*/
- u32 outer_second_cvlan_tag:1;
- u32 gre_key_l:8; /* GRE Key [7:0] (outer) */
+ u32 reserved_auto2:12;
+ /* The second vlan in the inner header of the packet is s-vlan (0x8a88).
+ * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ */
+ u32 gre_protocol:16; /* GRE Protocol (outer) */
+
u32 gre_key_h:24; /* GRE Key[31:8] (outer) */
- u32 reserved_auto4:8;
+ u32 gre_key_l:8; /* GRE Key [7:0] (outer) */
+
u32 vxlan_vni:24; /* VXLAN VNI (outer) */
- u32 geneve_oam:1; /* GENEVE OAM field (outer) */
- u32 reserved_auto5:7;
+ u32 reserved_auto3:8;
+
u32 geneve_vni:24; /* GENEVE VNI field (outer) */
+ u32 reserved_auto4:6;
+ u32 geneve_tlv_option_0_exist:1;
+ u32 geneve_oam:1; /* GENEVE OAM field (outer) */
+
+ u32 reserved_auto5:12;
u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */
+
u32 reserved_auto6:12;
u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */
- u32 reserved_auto7:12;
- u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */
+
+ u32 reserved_auto7:10;
u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */
- u32 reserved_auto8:10;
+ u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */
+
+ u32 reserved_auto8:8;
u32 bth_dst_qp:24; /* Destination QP in BTH header */
- u32 reserved_auto9:8;
- u8 reserved_auto10[20];
+
+ u32 reserved_auto9;
+ u32 outer_esp_spi;
+ u32 reserved_auto10[3];
};
struct mlx5dr_match_misc2 {
- u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */
- u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */
- u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */
u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */
- u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */
- u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */
- u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */
+ u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */
+ u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */
+ u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */
+
u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */
- u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */
- u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */
- u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */
+ u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */
+ u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */
+ u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */
+
u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */
- u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */
- u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */
- u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */
+
u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */
+ u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */
+
u32 metadata_reg_c_7; /* metadata_reg_c_7 */
u32 metadata_reg_c_6; /* metadata_reg_c_6 */
u32 metadata_reg_c_5; /* metadata_reg_c_5 */
@@ -687,7 +726,7 @@ struct mlx5dr_match_misc2 {
u32 metadata_reg_c_1; /* metadata_reg_c_1 */
u32 metadata_reg_c_0; /* metadata_reg_c_0 */
u32 metadata_reg_a; /* metadata_reg_a */
- u8 reserved_auto2[12];
+ u32 reserved_auto1[3];
};
struct mlx5dr_match_misc3 {
@@ -695,24 +734,34 @@ struct mlx5dr_match_misc3 {
u32 outer_tcp_seq_num;
u32 inner_tcp_ack_num;
u32 outer_tcp_ack_num;
- u32 outer_vxlan_gpe_vni:24;
+
u32 reserved_auto1:8;
- u32 reserved_auto2:16;
- u32 outer_vxlan_gpe_flags:8;
+ u32 outer_vxlan_gpe_vni:24;
+
u32 outer_vxlan_gpe_next_protocol:8;
+ u32 outer_vxlan_gpe_flags:8;
+ u32 reserved_auto2:16;
+
u32 icmpv4_header_data;
u32 icmpv6_header_data;
- u8 icmpv6_code;
- u8 icmpv6_type;
- u8 icmpv4_code;
+
u8 icmpv4_type;
+ u8 icmpv4_code;
+ u8 icmpv6_type;
+ u8 icmpv6_code;
+
u32 geneve_tlv_option_0_data;
- u8 gtpu_msg_flags;
- u8 gtpu_msg_type;
+
u32 gtpu_teid;
+
+ u8 gtpu_msg_type;
+ u8 gtpu_msg_flags;
+ u32 reserved_auto3:16;
+
u32 gtpu_dw_2;
u32 gtpu_first_ext_dw_0;
u32 gtpu_dw_0;
+ u32 reserved_auto4;
};
struct mlx5dr_match_misc4 {
@@ -724,6 +773,18 @@ struct mlx5dr_match_misc4 {
u32 prog_sample_field_id_2;
u32 prog_sample_field_value_3;
u32 prog_sample_field_id_3;
+ u32 reserved_auto1[8];
+};
+
+struct mlx5dr_match_misc5 {
+ u32 macsec_tag_0;
+ u32 macsec_tag_1;
+ u32 macsec_tag_2;
+ u32 macsec_tag_3;
+ u32 tunnel_header_0;
+ u32 tunnel_header_1;
+ u32 tunnel_header_2;
+ u32 tunnel_header_3;
};
struct mlx5dr_match_param {
@@ -733,12 +794,23 @@ struct mlx5dr_match_param {
struct mlx5dr_match_misc2 misc2;
struct mlx5dr_match_misc3 misc3;
struct mlx5dr_match_misc4 misc4;
+ struct mlx5dr_match_misc5 misc5;
};
#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
(_misc3)->icmpv4_code || \
(_misc3)->icmpv4_header_data)
+#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \
+ (_spec)->src_ip_95_64 || \
+ (_spec)->src_ip_63_32 || \
+ (_spec)->src_ip_31_0)
+
+#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \
+ (_spec)->dst_ip_95_64 || \
+ (_spec)->dst_ip_63_32 || \
+ (_spec)->dst_ip_31_0)
+
struct mlx5dr_esw_caps {
u64 drop_icm_address_rx;
u64 drop_icm_address_tx;
@@ -789,6 +861,7 @@ struct mlx5dr_cmd_caps {
u8 flex_parser_id_gtpu_teid;
u8 flex_parser_id_gtpu_dw_2;
u8 flex_parser_id_gtpu_first_ext_dw_0;
+ u8 flex_parser_ok_bits_supp;
u8 max_ft_level;
u16 roce_min_src_udp;
u8 sw_format_ver;
@@ -843,12 +916,15 @@ struct mlx5dr_domain {
struct mlx5dr_domain_info info;
struct xarray csum_fts_xa;
struct mlx5dr_ste_ctx *ste_ctx;
+ struct list_head dbg_tbl_list;
+ struct mlx5dr_dbg_dump_info dump_info;
};
struct mlx5dr_table_rx_tx {
struct mlx5dr_ste_htbl *s_anchor;
struct mlx5dr_domain_rx_tx *nic_dmn;
u64 default_icm_addr;
+ struct list_head nic_matcher_list;
};
struct mlx5dr_table {
@@ -862,6 +938,7 @@ struct mlx5dr_table {
struct list_head matcher_list;
struct mlx5dr_action *miss_action;
refcount_t refcount;
+ struct list_head dbg_node;
};
struct mlx5dr_matcher_rx_tx {
@@ -875,18 +952,21 @@ struct mlx5dr_matcher_rx_tx {
u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX];
u64 default_icm_addr;
struct mlx5dr_table_rx_tx *nic_tbl;
+ u32 prio;
+ struct list_head list_node;
+ u32 rules;
};
struct mlx5dr_matcher {
struct mlx5dr_table *tbl;
struct mlx5dr_matcher_rx_tx rx;
struct mlx5dr_matcher_rx_tx tx;
- struct list_head matcher_list;
+ struct list_head list_node; /* Used for both matchers and dbg managing */
u32 prio;
struct mlx5dr_match_param mask;
u8 match_criteria;
refcount_t refcount;
- struct mlx5dv_flow_matcher *dv_matcher;
+ struct list_head dbg_rule_list;
};
struct mlx5dr_ste_action_modify_field {
@@ -958,6 +1038,19 @@ struct mlx5dr_action_flow_tag {
u32 flow_tag;
};
+struct mlx5dr_rule_action_member {
+ struct mlx5dr_action *action;
+ struct list_head list;
+};
+
+struct mlx5dr_action_aso_flow_meter {
+ struct mlx5dr_domain *dmn;
+ u32 obj_id;
+ u32 offset;
+ u8 dest_reg_id;
+ u8 init_color;
+};
+
struct mlx5dr_action {
enum mlx5dr_action_type action_type;
refcount_t refcount;
@@ -972,6 +1065,7 @@ struct mlx5dr_action {
struct mlx5dr_action_vport *vport;
struct mlx5dr_action_push_vlan *push_vlan;
struct mlx5dr_action_flow_tag *flow_tag;
+ struct mlx5dr_action_aso_flow_meter *aso;
};
};
@@ -998,6 +1092,7 @@ struct mlx5dr_rule {
struct mlx5dr_rule_rx_tx rx;
struct mlx5dr_rule_rx_tx tx;
struct list_head rule_actions_list;
+ struct list_head dbg_node;
u32 flow_source;
};
@@ -1011,16 +1106,12 @@ int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
struct mlx5dr_icm_chunk {
struct mlx5dr_icm_buddy_mem *buddy_mem;
struct list_head chunk_list;
- u32 rkey;
- u32 num_of_entries;
- u32 byte_size;
- u64 icm_addr;
- u64 mr_addr;
/* indicates the index of this chunk in the whole memory,
* used for deleting the chunk from the buddy
*/
unsigned int seg;
+ enum mlx5dr_icm_chunk_size size;
/* Memory optimisation */
struct mlx5dr_ste *ste_arr;
@@ -1050,11 +1141,23 @@ static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn)
mlx5dr_domain_nic_unlock(&dmn->info.rx);
}
+int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+
int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
enum mlx5dr_ipv outer_ipv,
enum mlx5dr_ipv inner_ipv);
+u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk);
+u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk);
+u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste);
+
static inline int
mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type)
{
@@ -1087,7 +1190,7 @@ static inline int
mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl)
{
int num_of_entries =
- mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
+ mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size);
/* Threshold is 50%, one is added to table of size 1 */
return (num_of_entries + 1) / 2;
@@ -1096,7 +1199,7 @@ mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl)
static inline bool
mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl)
{
- if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
+ if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
return false;
return true;
@@ -1114,6 +1217,7 @@ struct mlx5dr_cmd_query_flow_table_details {
struct mlx5dr_cmd_create_flow_table_attr {
u32 table_type;
+ u16 uid;
u64 icm_addr_rx;
u64 icm_addr_tx;
u8 level;
@@ -1190,20 +1294,6 @@ struct mlx5dr_cmd_gid_attr {
u32 roce_ver;
};
-struct mlx5dr_cmd_qp_create_attr {
- u32 page_id;
- u32 pdn;
- u32 cqn;
- u32 pm_state;
- u32 service_type;
- u32 buff_umem_id;
- u32 db_umem_id;
- u32 sq_wqe_cnt;
- u32 rq_wqe_cnt;
- u32 rq_wqe_shift;
- u8 isolate_vl_tc:1;
-};
-
int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
u16 index, struct mlx5dr_cmd_gid_attr *attr);
@@ -1375,7 +1465,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
bool reformat_req,
u32 *tbl_id,
u32 *group_id,
- bool ignore_flow_level);
+ bool ignore_flow_level,
+ u32 flow_source);
void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
u32 group_id);
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 2632d5ae9bc0..13b6d4721e17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies */
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#include "fs_core.h"
#include "fs_cmd.h"
@@ -43,11 +44,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action);
if (err && action) {
err = mlx5dr_action_destroy(action);
- if (err) {
- action = NULL;
- mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
- err);
- }
+ if (err)
+ mlx5_core_err(ns->dev,
+ "Failed to destroy action (%d)\n", err);
+ action = NULL;
}
ft->fs_dr_table.miss_action = action;
if (old_miss_action) {
@@ -62,7 +62,7 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
struct mlx5dr_table *tbl;
@@ -71,7 +71,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
if (mlx5_dr_is_fw_table(ft->flags))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
- size,
+ ft_attr,
next_ft);
flags = ft->flags;
/* turn off encap/decap if not supported for sw-str by fw */
@@ -79,7 +79,8 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags);
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags,
+ ft_attr->uid);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
return -EINVAL;
@@ -194,6 +195,15 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
dest_attr->vport.vhca_id);
}
+static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+ return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1,
+ dest_attr->vport.vhca_id);
+}
+
static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain,
struct mlx5_flow_rule *dst)
{
@@ -218,11 +228,16 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai
static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst)
{
- return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
}
-#define MLX5_FLOW_CONTEXT_ACTION_MAX 32
+/* We want to support a rule with 32 destinations, which means we need to
+ * account for 32 destinations plus usually a counter plus one more action
+ * for a multi-destination flow table.
+ */
+#define MLX5_FLOW_CONTEXT_ACTION_MAX 34
static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_group *group,
@@ -392,9 +407,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
enum mlx5_flow_destination_type type = dst->dest_attr.type;
u32 id;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
- num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) {
- err = -ENOSPC;
+ if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
goto free_actions;
}
@@ -411,8 +426,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
fs_dr_actions[fs_dr_num_actions++] = tmp_action;
term_actions[num_term_actions++].dest = tmp_action;
break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
- tmp_action = create_vport_action(domain, dst);
+ tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ?
+ create_vport_action(domain, dst) :
+ create_uplink_action(domain, dst);
if (!tmp_action) {
err = -ENOMEM;
goto free_actions;
@@ -464,8 +482,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
- err = -ENOSPC;
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
goto free_actions;
}
@@ -482,21 +501,58 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
}
}
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ tmp_action =
+ mlx5dr_action_create_aso(domain,
+ fte->action.exe_aso.object_id,
+ fte->action.exe_aso.return_reg_id,
+ fte->action.exe_aso.type,
+ fte->action.exe_aso.flow_meter.init_color,
+ fte->action.exe_aso.flow_meter.meter_idx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
params.match_sz = match_sz;
params.match_buf = (u64 *)fte->val;
if (num_term_actions == 1) {
- if (term_actions->reformat)
+ if (term_actions->reformat) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
actions[num_actions++] = term_actions->reformat;
+ }
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
actions[num_actions++] = term_actions->dest;
} else if (num_term_actions > 1) {
bool ignore_flow_level =
!!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+ u32 flow_source = fte->flow_context.flow_source;
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
term_actions,
num_term_actions,
- ignore_flow_level);
+ ignore_flow_level,
+ flow_source);
if (!tmp_action) {
err = -EOPNOTSUPP;
goto free_actions;
@@ -721,6 +777,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
}
+static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ if (ft_type != FS_FT_FDB ||
+ MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5)
+ return 0;
+
+ return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+}
+
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
{
return mlx5dr_is_supported(dev);
@@ -745,6 +811,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
.set_peer = mlx5_cmd_dr_set_peer,
.create_ns = mlx5_cmd_dr_create_ns,
.destroy_ns = mlx5_cmd_dr_destroy_ns,
+ .get_capabilities = mlx5_cmd_dr_get_capabilities,
};
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
index 1fb185d6ac7f..d168622063d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
@@ -14,10 +14,6 @@ struct mlx5_fs_dr_action {
struct mlx5dr_action *dr_action;
};
-struct mlx5_fs_dr_ns {
- struct mlx5_dr_ns *dr_ns;
-};
-
struct mlx5_fs_dr_rule {
struct mlx5dr_rule *dr_rule;
/* Only actions created by fs_dr */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
index d2a937f69784..fb078fa0f0cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -447,6 +447,14 @@ struct mlx5_ifc_ste_flex_parser_1_bits {
u8 flex_parser_4[0x20];
};
+struct mlx5_ifc_ste_flex_parser_ok_bits {
+ u8 flex_parser_3[0x20];
+ u8 flex_parser_2[0x20];
+ u8 flex_parsers_ok[0x8];
+ u8 reserved_at_48[0x18];
+ u8 flex_parser_0[0x20];
+};
+
struct mlx5_ifc_ste_flex_parser_tnl_bits {
u8 flex_parser_tunneling_header_63_32[0x20];
@@ -490,6 +498,14 @@ struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_ste_tunnel_header_bits {
+ u8 tunnel_header_0[0x20];
+
+ u8 tunnel_header_1[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
struct mlx5_ifc_ste_general_purpose_bits {
u8 general_purpose_lookup_field[0x20];
@@ -558,4 +574,30 @@ struct mlx5_ifc_dr_action_hw_copy_bits {
u8 reserved_at_38[0x8];
};
+enum {
+ MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ = 2,
+};
+
+struct mlx5_ifc_ste_aso_flow_meter_action_bits {
+ u8 reserved_at_0[0xc];
+ u8 action[0x1];
+ u8 initial_color[0x2];
+ u8 line_id[0x1];
+};
+
+struct mlx5_ifc_ste_double_action_aso_v1_bits {
+ u8 action_id[0x8];
+ u8 aso_context_number[0x18];
+
+ u8 dest_reg_id[0x2];
+ u8 change_ordering_tag[0x1];
+ u8 aso_check_ordering[0x1];
+ u8 aso_context_type[0x4];
+ u8 reserved_at_28[0x8];
+ union {
+ u8 aso_fields[0x10];
+ struct mlx5_ifc_ste_aso_flow_meter_action_bits flow_meter;
+ };
+};
+
#endif /* MLX5_IFC_DR_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index c7c93131b762..226a0d7bb06d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -51,7 +51,11 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn);
struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags);
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
+ u16 uid);
+
+struct mlx5dr_table *
+mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft);
int mlx5dr_table_destroy(struct mlx5dr_table *table);
@@ -96,7 +100,8 @@ struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
u32 num_of_dests,
- bool ignore_flow_level);
+ bool ignore_flow_level,
+ u32 flow_source);
struct mlx5dr_action *mlx5dr_action_create_drop(void);
@@ -127,6 +132,14 @@ struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void);
struct mlx5dr_action *
mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr);
+struct mlx5dr_action *
+mlx5dr_action_create_aso(struct mlx5dr_domain *dmn,
+ u32 obj_id,
+ u8 return_reg_id,
+ u8 aso_type,
+ u8 init_color,
+ u8 meter_id);
+
int mlx5dr_action_destroy(struct mlx5dr_action *action);
static inline bool
@@ -136,7 +149,7 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev)
(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
(MLX5_CAP_GEN(dev, steering_format_version) <=
- MLX5_STEERING_FORMAT_CONNECTX_6DX)));
+ MLX5_STEERING_FORMAT_CONNECTX_7)));
}
/* buddy functions & structure */
@@ -160,6 +173,11 @@ struct mlx5dr_icm_buddy_mem {
* sync_ste command sets them free.
*/
struct list_head hot_list;
+
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ struct list_head *miss_list;
+ u8 *hw_ste_arr;
};
int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 01e9c412977c..8455e79bc44a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
@@ -100,19 +99,21 @@ static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
int err = -ENOMEM;
phys_addr_t pfn;
int bfregs;
+ int node;
int i;
bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
- up = kzalloc(sizeof(*up), GFP_KERNEL);
+ node = mdev->priv.numa_node;
+ up = kzalloc_node(sizeof(*up), GFP_KERNEL, node);
if (!up)
return ERR_PTR(err);
up->mdev = mdev;
- up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
+ up->reg_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node);
if (!up->reg_bitmap)
goto error1;
- up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
+ up->fp_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node);
if (!up->fp_bitmap)
goto error1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 8846d30a380a..d5c317325030 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -280,7 +280,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) +
req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
- out = kzalloc(out_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -307,7 +307,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
ether_addr_copy(addr_list[i], mac_addr);
}
out:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
@@ -335,7 +335,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
- in = kzalloc(in_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -360,7 +360,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
}
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
- kfree(in);
+ kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
@@ -386,7 +386,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
list_size * MLX5_ST_SZ_BYTES(vlan_layout);
memset(out, 0, sizeof(out));
- in = kzalloc(in_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -411,7 +411,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
}
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
- kfree(in);
+ kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
@@ -542,8 +542,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
out_sz += nout * sizeof(*gid);
- in = kzalloc(in_sz, GFP_KERNEL);
- out = kzalloc(out_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
@@ -573,8 +573,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
gid->global.interface_id = tmp->global.interface_id;
out:
- kfree(in);
- kfree(out);
+ kvfree(in);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
@@ -607,8 +607,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
- in = kzalloc(in_sz, GFP_KERNEL);
- out = kzalloc(out_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
@@ -638,8 +638,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
*pkey = MLX5_GET_PR(pkey, pkarr, pkey);
out:
- kfree(in);
- kfree(out);
+ kvfree(in);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
@@ -658,7 +658,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
- out = kzalloc(out_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -717,7 +717,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
system_image_guid);
ex:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
@@ -728,7 +728,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep;
int err;
- rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep)
return -ENOMEM;
@@ -736,7 +736,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
if (!err)
*sys_image_guid = rep->sys_image_guid;
- kfree(rep);
+ kvfree(rep);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
@@ -747,7 +747,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep;
int err;
- rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep)
return -ENOMEM;
@@ -755,7 +755,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
if (!err)
*node_guid = rep->node_guid;
- kfree(rep);
+ kvfree(rep);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
@@ -770,7 +770,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
int err;
- out = kzalloc(outlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -786,7 +786,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
nic_vport_context.promisc_all);
out:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
@@ -874,7 +874,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
int value;
int err;
- out = kzalloc(outlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -891,7 +891,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
*status = !value;
out:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
@@ -1033,7 +1033,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
mlx5_core_dbg(dev, "vf %d\n", vf);
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
- in = kzalloc(in_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1065,7 +1065,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
req->cap_mask1_perm);
err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in);
ex:
- kfree(in);
+ kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
@@ -1086,9 +1086,17 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
goto free;
MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
- MLX5_SET(modify_nic_vport_context_in, in,
- nic_vport_context.affiliated_vhca_id,
- MLX5_CAP_GEN(master_mdev, vhca_id));
+ if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) {
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN_2(master_mdev, sw_vhca_id));
+ } else {
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN(master_mdev, vhca_id));
+ }
MLX5_SET(modify_nic_vport_context_in, in,
nic_vport_context.affiliation_criteria,
MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index e5c4dcd1425e..4d629e5ddbc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -123,7 +123,7 @@ static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
wq->cur_sz++;
}
-static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n)
{
wq->wqe_ctr += n;
wq->cur_sz += n;