aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig153
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c118
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c725
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c683
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c734
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h781
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h312
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c613
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c177
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.c722
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c263
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c215
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c1219
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h164
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c274
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c854
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h102
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c518
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c351
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c573
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c398
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c900
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c653
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c467
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c606
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c640
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c266
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c337
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c228
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c380
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c457
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c585
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c184
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c656
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2277
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h220
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h218
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c675
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c1757
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c128
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c203
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c331
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h379
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c378
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c230
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c308
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c232
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h130
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c403
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c269
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c606
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c205
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c364
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h128
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c198
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h189
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c782
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c)49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c788
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c138
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c1861
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c1384
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c242
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h113
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c387
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c238
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c67
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c122
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c981
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c1542
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c240
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c4617
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c1849
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h129
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c1576
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c1096
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h107
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c4886
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h298
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c898
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c394
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c173
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c259
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c163
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c282
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c409
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c1853
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c190
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c523
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c529
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c955
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c140
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2386
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h534
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c3218
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c758
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c134
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c168
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c1539
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h67
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c622
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c404
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c982
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c546
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c278
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c346
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c226
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c772
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c1577
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h126
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c)109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h (renamed from drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h)12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c637
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c432
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h90
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c692
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c810
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c608
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c159
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c150
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1088
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h165
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c355
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c748
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c238
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c737
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c266
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c292
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c103
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c571
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h173
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c364
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h82
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c191
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c117
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c1506
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c221
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c657
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c261
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c647
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c774
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c416
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c260
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c2254
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h206
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c1960
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c2172
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c133
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h938
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c253
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h149
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h434
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h165
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c255
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h17
332 files changed, 78126 insertions, 26177 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index a1f20b205299..26685fd0fdaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -6,24 +6,20 @@
config MLX5_CORE
tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
depends on PCI
+ select AUXILIARY_BUS
select NET_DEVLINK
- imply PTP_1588_CLOCK
- imply VXLAN
- imply MLXFW
- imply PCI_HYPERV_INTERFACE
- default n
- ---help---
+ depends on VXLAN || !VXLAN
+ depends on MLXFW || !MLXFW
+ depends on PTP_1588_CLOCK_OPTIONAL
+ depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE
+ help
Core driver for low level functionality of the ConnectX-4 and
Connect-IB cards by Mellanox Technologies.
-config MLX5_ACCEL
- bool
-
config MLX5_FPGA
bool "Mellanox Technologies Innova support"
depends on MLX5_CORE
- select MLX5_ACCEL
- ---help---
+ help
Build support for the Innova family of network cards by Mellanox
Technologies. Innova network cards are comprised of a ConnectX chip
and an FPGA chip on one board. If you select this option, the
@@ -35,15 +31,14 @@ config MLX5_CORE_EN
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
select PAGE_POOL
select DIMLIB
- default n
- ---help---
+ help
Ethernet support in Mellanox Technologies ConnectX-4 NIC.
config MLX5_EN_ARFS
bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support"
depends on MLX5_CORE_EN && RFS_ACCEL
default y
- ---help---
+ help
Mellanox MLX5 ethernet hardware-accelerated receive flow steering support,
Enables ethernet netdevice arfs support and ntuple filtering.
@@ -51,7 +46,7 @@ config MLX5_EN_RXNFC
bool "Mellanox MLX5 ethernet rx nfc flow steering support"
depends on MLX5_CORE_EN
default y
- ---help---
+ help
Mellanox MLX5 ethernet rx nfc flow steering support
Enables ethtool receive network flow classification, which allows user defined
flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc
@@ -61,7 +56,7 @@ config MLX5_MPFS
bool "Mellanox Technologies MLX5 MPFS support"
depends on MLX5_CORE_EN
default y
- ---help---
+ help
Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
support in ConnectX NIC. MPFs is required for when multi-PF configuration
is enabled to allow passing user configured unicast MAC addresses to the
@@ -71,18 +66,66 @@ config MLX5_ESWITCH
bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
depends on MLX5_CORE_EN && NET_SWITCHDEV
default y
- ---help---
+ help
Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
E-Switch provides internal SRIOV packet steering and switching for the
enabled VFs and PF in two available modes:
Legacy SRIOV mode (L2 mac vlan steering based).
Switchdev mode (eswitch offloads).
+config MLX5_BRIDGE
+ bool
+ depends on MLX5_ESWITCH && BRIDGE
+ default y
+ help
+ mlx5 ConnectX offloads support for Ethernet Bridging (BRIDGE).
+ Enable adding representors of mlx5 uplink and VF ports to Bridge and
+ offloading rules for traffic between such ports. Supports VLANs (trunk and
+ access modes).
+
+config MLX5_CLS_ACT
+ bool "MLX5 TC classifier action support"
+ depends on MLX5_ESWITCH && NET_CLS_ACT
+ default y
+ help
+ mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT),
+ works in both native NIC mode and Switchdev SRIOV mode.
+ Actions get attached to a Hardware offloaded classifiers and are
+ invoked after a successful classification. Actions are used to
+ overwrite the classification result, instantly drop or redirect and/or
+ reformat packets in wire speeds without involving the host cpu.
+
+ If set to N, TC offloads in both NIC and switchdev modes will be disabled.
+ If unsure, set to Y
+
+config MLX5_TC_CT
+ bool "MLX5 TC connection tracking offload support"
+ depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
+ default y
+ help
+ Say Y here if you want to support offloading connection tracking rules
+ via tc ct action.
+
+ If unsure, set to Y
+
+config MLX5_TC_SAMPLE
+ bool "MLX5 TC sample offload support"
+ depends on MLX5_CLS_ACT
+ depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m
+ default y
+ help
+ Say Y here if you want to support offloading sample rules via tc
+ sample action.
+ If set to N, will not be able to configure tc rules with sample
+ action.
+
+ If unsure, set to Y
+
config MLX5_CORE_EN_DCB
bool "Data Center Bridging (DCB) Support"
default y
depends on MLX5_CORE_EN && DCB
- ---help---
+ help
Say Y here if you want to use Data Center Bridging (DCB) in the
driver.
If set to N, will not be able to configure QoS and ratelimit attributes.
@@ -93,71 +136,55 @@ config MLX5_CORE_EN_DCB
config MLX5_CORE_IPOIB
bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support"
depends on MLX5_CORE_EN
- default n
- ---help---
+ help
MLX5 IPoIB offloads & acceleration support.
-config MLX5_FPGA_IPSEC
- bool "Mellanox Technologies IPsec Innova support"
- depends on MLX5_CORE
- depends on MLX5_FPGA
+config MLX5_EN_MACSEC
+ bool "Connect-X support for MACSec offload"
+ depends on MLX5_CORE_EN
+ depends on MACSEC
default n
help
- Build IPsec support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
+ Build support for MACsec cryptography-offload acceleration in the NIC.
config MLX5_EN_IPSEC
- bool "IPSec XFRM cryptography-offload accelaration"
+ bool "Mellanox Technologies IPsec Connect-X support"
depends on MLX5_CORE_EN
depends on XFRM_OFFLOAD
depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
- depends on MLX5_FPGA_IPSEC
- default n
- help
- Build support for IPsec cryptography-offload accelaration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
-
-config MLX5_FPGA_TLS
- bool "Mellanox Technologies TLS Innova support"
- depends on TLS_DEVICE
- depends on TLS=y || MLX5_CORE=m
- depends on MLX5_FPGA
- default n
help
- Build TLS support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
+ Build support for IPsec cryptography-offload acceleration in the NIC.
-config MLX5_TLS
+config MLX5_EN_TLS
bool "Mellanox Technologies TLS Connect-X support"
- depends on MLX5_CORE_EN
depends on TLS_DEVICE
depends on TLS=y || MLX5_CORE=m
- select MLX5_ACCEL
- default n
- help
- Build TLS support for the Connect-X family of network cards by Mellanox
- Technologies.
-
-config MLX5_EN_TLS
- bool "TLS cryptography-offload accelaration"
depends on MLX5_CORE_EN
- depends on MLX5_FPGA_TLS || MLX5_TLS
- default y
help
- Build support for TLS cryptography-offload accelaration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
+ Build support for TLS cryptography-offload acceleration in the NIC.
config MLX5_SW_STEERING
bool "Mellanox Technologies software-managed steering"
depends on MLX5_CORE_EN && MLX5_ESWITCH
+ select CRC32
default y
help
Build support for software-managed steering in the NIC.
+
+config MLX5_SF
+ bool "Mellanox Technologies subfunction device support using auxiliary device"
+ depends on MLX5_CORE && MLX5_CORE_EN
+ help
+ Build support for subfuction device in the NIC. A Mellanox subfunction
+ device can support RDMA, netdevice and vdpa device.
+ It is similar to a SRIOV VF but it doesn't require SRIOV support.
+
+config MLX5_SF_MANAGER
+ bool
+ depends on MLX5_SF && MLX5_ESWITCH
+ default y
+ help
+ Build support for subfuction port in the NIC. A Mellanox subfunction
+ port is managed through devlink. A subfunction supports RDMA, netdevice
+ and vdpa device. It is similar to a SRIOV VF but it doesn't require
+ SRIOV support.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d3e06cec8317..a22c32aabf11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -12,20 +12,24 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
# mlx5 core basic
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
- health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
+ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
- fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
- lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
- diag/fw_tracer.o diag/crdump.o devlink.o
+ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
+ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
+ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+ fw_reset.o qos.o lib/tout.o lib/aso.o
#
# Netdev basic
#
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
+ en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
- en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
- en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
+ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
+ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
+ en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \
+ lib/crypto.o
#
# Netdev extra
@@ -33,16 +37,46 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
- lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
- en/tc_tun_geneve.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
+ en_rep.o en/rep/bond.o en/mod_hdr.o \
+ en/mapping.o lag/mpesw.o
+mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
+ lib/fs_chains.o en/tc_tun.o \
+ esw/indir_table.o en/tc_tun_encap.o \
+ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
+ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
+ en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \
+ en/tc/post_meter.o
+
+mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \
+ en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \
+ en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \
+ en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \
+ en/tc/act/mirred.o en/tc/act/mirred_nic.o \
+ en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \
+ en/tc/act/redirect_ingress.o en/tc/act/police.o
+
+ifneq ($(CONFIG_MLX5_TC_CT),)
+ mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o
+ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o
+endif
+
+mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
#
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
- ecpf.o rdma.o eswitch_offloads_chains.o
+ ecpf.o rdma.o esw/legacy.o \
+ esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
+
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
+ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
+ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+
+mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
+
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
@@ -56,21 +90,34 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
#
# Accelerations & FPGA
#
-mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
-mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o
-mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o
-
mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
+mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \
+ en_accel/macsec_stats.o
+
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
- en_accel/ipsec_stats.o
+ en_accel/ipsec_stats.o en_accel/ipsec_fs.o \
+ en_accel/ipsec_offload.o
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
- en_accel/ktls.o en_accel/ktls_tx.o
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \
+ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \
+ en_accel/ktls_tx.o en_accel/ktls_rx.o
mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
steering/dr_matcher.o steering/dr_rule.o \
- steering/dr_icm_pool.o \
+ steering/dr_icm_pool.o steering/dr_buddy.o \
steering/dr_ste.o steering/dr_send.o \
+ steering/dr_ste_v0.o steering/dr_ste_v1.o \
+ steering/dr_ste_v2.o \
steering/dr_cmd.o steering/dr_fw.o \
- steering/dr_action.o steering/fs_dr.o
+ steering/dr_action.o steering/fs_dr.o \
+ steering/dr_dbg.o lib/smfs.o
+#
+# SF device
+#
+mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o
+
+#
+# SF manager
+#
+mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
deleted file mode 100644
index c13260467750..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef __MLX5E_ACCEL_H__
-#define __MLX5E_ACCEL_H__
-
-#ifdef CONFIG_MLX5_ACCEL
-
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include "en.h"
-
-static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
-{
- __be16 *ethtype;
-
- if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN))
- return false;
- ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
- if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
- return false;
- return true;
-}
-
-static inline void remove_metadata_hdr(struct sk_buff *skb)
-{
- struct ethhdr *old_eth;
- struct ethhdr *new_eth;
-
- /* Remove the metadata from the buffer */
- old_eth = (struct ethhdr *)skb->data;
- new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
- memmove(new_eth, old_eth, 2 * ETH_ALEN);
- /* Ethertype is already in its new place */
- skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
-}
-
-#endif /* CONFIG_MLX5_ACCEL */
-
-#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
deleted file mode 100644
index eddc34e4a762..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifdef CONFIG_MLX5_FPGA_IPSEC
-
-#include <linux/mlx5/device.h>
-
-#include "accel/ipsec.h"
-#include "mlx5_core.h"
-#include "fpga/ipsec.h"
-
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_ipsec_device_caps(mdev);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
-
-unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_ipsec_counters_count(mdev);
-}
-
-int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int count)
-{
- return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
-}
-
-void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6)
-{
- return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
- spi, is_ipv6);
-}
-
-void mlx5_accel_esp_free_hw_context(void *context)
-{
- mlx5_fpga_ipsec_delete_sa_ctx(context);
-}
-
-int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_ipsec_init(mdev);
-}
-
-void mlx5_accel_ipsec_build_fs_cmds(void)
-{
- mlx5_fpga_ipsec_build_fs_cmds();
-}
-
-void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
- mlx5_fpga_ipsec_cleanup(mdev);
-}
-
-struct mlx5_accel_esp_xfrm *
-mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- struct mlx5_accel_esp_xfrm *xfrm;
-
- xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
- if (IS_ERR(xfrm))
- return xfrm;
-
- xfrm->mdev = mdev;
- return xfrm;
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
-
-void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- mlx5_fpga_esp_destroy_xfrm(xfrm);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
-
-int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
-
-#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
deleted file mode 100644
index 530e428d46ab..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_ACCEL_IPSEC_H__
-#define __MLX5_ACCEL_IPSEC_H__
-
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/accel.h>
-
-#ifdef CONFIG_MLX5_FPGA_IPSEC
-
-#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
-
-unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
-int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int count);
-
-void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6);
-void mlx5_accel_esp_free_hw_context(void *context);
-
-int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
-void mlx5_accel_ipsec_build_fs_cmds(void);
-void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
-
-#else
-
-#define MLX5_IPSEC_DEV(mdev) false
-
-static inline void *
-mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6)
-{
- return NULL;
-}
-
-static inline void mlx5_accel_esp_free_hw_context(void *context)
-{
-}
-
-static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline void mlx5_accel_ipsec_build_fs_cmds(void)
-{
-}
-
-static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
-}
-
-#endif
-
-#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
deleted file mode 100644
index cab708af3422..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-
-#include "accel/tls.h"
-#include "mlx5_core.h"
-#include "lib/mlx5.h"
-
-#ifdef CONFIG_MLX5_FPGA_TLS
-#include "fpga/tls.h"
-
-int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx)
-{
- return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
- start_offload_tcp_sn, p_swid,
- direction_sx);
-}
-
-void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx)
-{
- mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
-}
-
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
- u64 rcd_sn)
-{
- return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
-}
-
-bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_is_tls_device(mdev) ||
- mlx5_accel_is_ktls_device(mdev);
-}
-
-u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_tls_device_caps(mdev);
-}
-
-int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_tls_init(mdev);
-}
-
-void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
-{
- mlx5_fpga_tls_cleanup(mdev);
-}
-#endif
-
-#ifdef CONFIG_MLX5_TLS
-int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id)
-{
- u32 sz_bytes;
- void *key;
-
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *info =
- (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
-
- key = info->key;
- sz_bytes = sizeof(info->key);
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *info =
- (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
-
- key = info->key;
- sz_bytes = sizeof(info->key);
- break;
- }
- default:
- return -EINVAL;
- }
-
- return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id);
-}
-
-void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
-{
- mlx5_destroy_encryption_key(mdev, key_id);
-}
-#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
deleted file mode 100644
index e09bc3858d57..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_ACCEL_TLS_H__
-#define __MLX5_ACCEL_TLS_H__
-
-#include <linux/mlx5/driver.h>
-#include <linux/tls.h>
-
-#ifdef CONFIG_MLX5_TLS
-int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id);
-void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
-
-static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
-{
- if (!MLX5_CAP_GEN(mdev, tls_tx))
- return false;
-
- if (!MLX5_CAP_GEN(mdev, log_max_dek))
- return false;
-
- return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
-}
-
-static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info)
-{
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- if (crypto_info->version == TLS_1_2_VERSION)
- return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
- break;
- }
-
- return false;
-}
-#else
-static inline int
-mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id) { return -ENOTSUPP; }
-static inline void
-mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
-
-static inline bool
-mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
-static inline bool
-mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info) { return false; }
-#endif
-
-enum {
- MLX5_ACCEL_TLS_TX = BIT(0),
- MLX5_ACCEL_TLS_RX = BIT(1),
- MLX5_ACCEL_TLS_V12 = BIT(2),
- MLX5_ACCEL_TLS_V13 = BIT(3),
- MLX5_ACCEL_TLS_LRO = BIT(4),
- MLX5_ACCEL_TLS_IPV6 = BIT(5),
- MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
- MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
-};
-
-struct mlx5_ifc_tls_flow_bits {
- u8 src_port[0x10];
- u8 dst_port[0x10];
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
- u8 ipv6[0x1];
- u8 direction_sx[0x1];
- u8 reserved_at_2[0x1e];
-};
-
-#ifdef CONFIG_MLX5_FPGA_TLS
-int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx);
-void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx);
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
- u64 rcd_sn);
-bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
-u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
-int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
-void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
-
-#else
-
-static inline int
-mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx) { return -ENOTSUPP; }
-static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx) { }
-static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
- u32 seq, u64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return mlx5_accel_is_ktls_device(mdev);
-}
-static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
-static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
-static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-#endif
-
-#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 42198e64a7f4..6aca004e88cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -56,8 +56,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
size_t size, dma_addr_t *dma_handle,
int node)
{
+ struct device *device = mlx5_core_dma_dev(dev);
struct mlx5_priv *priv = &dev->priv;
- struct device *device = dev->device;
int original_node;
void *cpu_handle;
@@ -71,53 +71,6 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
return cpu_handle;
}
-static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_frag_buf *buf, int node)
-{
- dma_addr_t t;
-
- buf->size = size;
- buf->npages = 1;
- buf->page_shift = (u8)get_order(size) + PAGE_SHIFT;
-
- buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
- if (!buf->frags)
- return -ENOMEM;
-
- buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size,
- &t, node);
- if (!buf->frags->buf)
- goto err_out;
-
- buf->frags->map = t;
-
- while (t & ((1 << buf->page_shift) - 1)) {
- --buf->page_shift;
- buf->npages *= 2;
- }
-
- return 0;
-err_out:
- kfree(buf->frags);
- return -ENOMEM;
-}
-
-int mlx5_buf_alloc(struct mlx5_core_dev *dev,
- int size, struct mlx5_frag_buf *buf)
-{
- return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
-}
-EXPORT_SYMBOL(mlx5_buf_alloc);
-
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
-{
- dma_free_coherent(dev->device, buf->size, buf->frags->buf,
- buf->frags->map);
-
- kfree(buf->frags);
-}
-EXPORT_SYMBOL_GPL(mlx5_buf_free);
-
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node)
{
@@ -140,7 +93,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
if (!frag->buf)
goto err_free_buf;
if (frag->map & ((1 << buf->page_shift) - 1)) {
- dma_free_coherent(dev->device, frag_sz,
+ dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz,
buf->frags[i].buf, buf->frags[i].map);
mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
&frag->map, buf->page_shift);
@@ -153,7 +106,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
err_free_buf:
while (i--)
- dma_free_coherent(dev->device, PAGE_SIZE, buf->frags[i].buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf,
buf->frags[i].map);
kfree(buf->frags);
err_out:
@@ -169,7 +122,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
for (i = 0; i < buf->npages; i++) {
int frag_sz = min_t(int, size, PAGE_SIZE);
- dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf,
buf->frags[i].map);
size -= frag_sz;
}
@@ -183,11 +136,11 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
u32 db_per_page = PAGE_SIZE / cache_line_size();
struct mlx5_db_pgdir *pgdir;
- pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ pgdir = kzalloc_node(sizeof(*pgdir), GFP_KERNEL, node);
if (!pgdir)
return NULL;
- pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL);
+ pgdir->bitmap = bitmap_zalloc_node(db_per_page, GFP_KERNEL, node);
if (!pgdir->bitmap) {
kfree(pgdir);
return NULL;
@@ -260,12 +213,6 @@ out:
}
EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
-int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
-{
- return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
-}
-EXPORT_SYMBOL_GPL(mlx5_db_alloc);
-
void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
{
u32 db_per_page = PAGE_SIZE / cache_line_size();
@@ -275,7 +222,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
__set_bit(db->index, db->u.pgdir->bitmap);
if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
- dma_free_coherent(dev->device, PAGE_SIZE,
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
bitmap_free(db->u.pgdir->bitmap);
@@ -286,24 +233,18 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
}
EXPORT_SYMBOL_GPL(mlx5_db_free);
-void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
+void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm)
{
- u64 addr;
int i;
- for (i = 0; i < buf->npages; i++) {
- addr = buf->frags->map + (i << buf->page_shift);
-
- pas[i] = cpu_to_be64(addr);
- }
+ WARN_ON(perm & 0xfc);
+ for (i = 0; i < buf->npages; i++)
+ pas[i] = cpu_to_be64(buf->frags[i].map | perm);
}
-EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
{
- int i;
-
- for (i = 0; i < buf->npages; i++)
- pas[i] = cpu_to_be64(buf->frags[i].map);
+ mlx5_fill_page_frag_array_perm(buf, pas, 0);
}
EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 34cba97f7bf4..2e0d59ca62b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -31,7 +31,6 @@
*/
#include <linux/highmem.h>
-#include <linux/module.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
@@ -45,6 +44,7 @@
#include "mlx5_core.h"
#include "lib/eq.h"
+#include "lib/tout.h"
enum {
CMD_IF_REV = 5,
@@ -69,12 +69,10 @@ enum {
MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
};
-static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
- struct mlx5_cmd_msg *in,
- struct mlx5_cmd_msg *out,
- void *uout, int uout_size,
- mlx5_cmd_cbk_t cbk,
- void *context, int page_queue)
+static struct mlx5_cmd_work_ent *
+cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk, void *context, int page_queue)
{
gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
struct mlx5_cmd_work_ent *ent;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
if (!ent)
return ERR_PTR(-ENOMEM);
+ ent->idx = -EINVAL;
ent->in = in;
ent->out = out;
ent->uout = uout;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
ent->context = context;
ent->cmd = cmd;
ent->page_queue = page_queue;
+ refcount_set(&ent->refcnt, 1);
return ent;
}
+static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
+{
+ kfree(ent);
+}
+
static u8 alloc_token(struct mlx5_cmd *cmd)
{
u8 token;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
return token;
}
-static int alloc_ent(struct mlx5_cmd *cmd)
+static int cmd_alloc_index(struct mlx5_cmd *cmd)
{
unsigned long flags;
int ret;
@@ -123,12 +128,33 @@ static int alloc_ent(struct mlx5_cmd *cmd)
return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
}
-static void free_ent(struct mlx5_cmd *cmd, int idx)
+static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
+ lockdep_assert_held(&cmd->alloc_lock);
+ set_bit(idx, &cmd->bitmask);
+}
+
+static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+{
+ refcount_inc(&ent->refcnt);
+}
+
+static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
+{
+ struct mlx5_cmd *cmd = ent->cmd;
unsigned long flags;
spin_lock_irqsave(&cmd->alloc_lock, flags);
- set_bit(idx, &cmd->bitmask);
+ if (!refcount_dec_and_test(&ent->refcnt))
+ goto out;
+
+ if (ent->idx >= 0) {
+ cmd_free_index(cmd, ent->idx);
+ up(ent->page_queue ? &cmd->pages_sem : &cmd->sem);
+ }
+
+ cmd_free_ent(ent);
+out:
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
@@ -164,10 +190,10 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block)
int xor_len = sizeof(*block) - sizeof(block->data) - 1;
if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
- return -EINVAL;
+ return -EHWPOISON;
if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
- return -EINVAL;
+ return -EHWPOISON;
return 0;
}
@@ -204,9 +230,13 @@ static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
static void poll_timeout(struct mlx5_cmd_work_ent *ent)
{
- unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
+ struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd);
+ u64 cmd_to_ms = mlx5_tout_ms(dev, CMD);
+ unsigned long poll_end;
u8 own;
+ poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000);
+
do {
own = READ_ONCE(ent->lay->status_own);
if (!(own & CMD_OWNER_HW)) {
@@ -219,11 +249,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
ent->ret = -ETIMEDOUT;
}
-static void free_cmd(struct mlx5_cmd_work_ent *ent)
-{
- kfree(ent);
-}
-
static int verify_signature(struct mlx5_cmd_work_ent *ent)
{
struct mlx5_cmd_mailbox *next = ent->out->next;
@@ -234,12 +259,12 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
if (sig != 0xff)
- return -EINVAL;
+ return -EHWPOISON;
for (i = 0; i < n && next; i++) {
err = verify_block_sig(next->buf);
if (err)
- return err;
+ return -EHWPOISON;
next = next->next;
}
@@ -247,15 +272,15 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
return 0;
}
-static void dump_buf(void *buf, int size, int data_only, int offset)
+static void dump_buf(void *buf, int size, int data_only, int offset, int idx)
{
__be32 *p = buf;
int i;
for (i = 0; i < size; i += 16) {
- pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
- be32_to_cpu(p[1]), be32_to_cpu(p[2]),
- be32_to_cpu(p[3]));
+ pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset,
+ be32_to_cpu(p[0]), be32_to_cpu(p[1]),
+ be32_to_cpu(p[2]), be32_to_cpu(p[3]));
p += 4;
offset += 16;
}
@@ -317,6 +342,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_DEALLOC_MEMIC:
case MLX5_CMD_OP_PAGE_FAULT_RESUME:
case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ case MLX5_CMD_OP_DEALLOC_SF:
+ case MLX5_CMD_OP_DESTROY_UCTX:
+ case MLX5_CMD_OP_DESTROY_UMEM:
+ case MLX5_CMD_OP_MODIFY_RQT:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -422,7 +451,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_MODIFY_TIS:
case MLX5_CMD_OP_QUERY_TIS:
case MLX5_CMD_OP_CREATE_RQT:
- case MLX5_CMD_OP_MODIFY_RQT:
case MLX5_CMD_OP_QUERY_RQT:
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
@@ -442,15 +470,21 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
case MLX5_CMD_OP_CREATE_UCTX:
- case MLX5_CMD_OP_DESTROY_UCTX:
case MLX5_CMD_OP_CREATE_UMEM:
- case MLX5_CMD_OP_DESTROY_UMEM:
case MLX5_CMD_OP_ALLOC_MEMIC:
case MLX5_CMD_OP_MODIFY_XRQ:
case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_QUERY_VHCA_STATE:
+ case MLX5_CMD_OP_MODIFY_VHCA_STATE:
+ case MLX5_CMD_OP_ALLOC_SF:
+ case MLX5_CMD_OP_SUSPEND_VHCA:
+ case MLX5_CMD_OP_RESUME_VHCA:
+ case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+ case MLX5_CMD_OP_SAVE_VHCA_STATE:
+ case MLX5_CMD_OP_LOAD_VHCA_STATE:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
- return -EIO;
+ return -ENOLINK;
default:
mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
return -EINVAL;
@@ -641,6 +675,15 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DESTROY_UMEM);
MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR);
MLX5_COMMAND_STR_CASE(MODIFY_XRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(ALLOC_SF);
+ MLX5_COMMAND_STR_CASE(DEALLOC_SF);
+ MLX5_COMMAND_STR_CASE(SUSPEND_VHCA);
+ MLX5_COMMAND_STR_CASE(RESUME_VHCA);
+ MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE);
+ MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE);
default: return "unknown command opcode";
}
}
@@ -727,44 +770,72 @@ struct mlx5_ifc_mbox_in_bits {
u8 reserved_at_40[0x40];
};
-void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome)
+void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
{
- *status = MLX5_GET(mbox_out, out, status);
- *syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
+
+ mlx5_core_err_rl(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
+ mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome, cmd_status_to_err(status));
}
+EXPORT_SYMBOL(mlx5_cmd_out_err);
-static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
+static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
{
+ u16 opcode, op_mod;
u32 syndrome;
u8 status;
- u16 opcode;
- u16 op_mod;
u16 uid;
+ int err;
- mlx5_cmd_mbox_status(out, &status, &syndrome);
- if (!status)
- return 0;
+ syndrome = MLX5_GET(mbox_out, out, syndrome);
+ status = MLX5_GET(mbox_out, out, status);
opcode = MLX5_GET(mbox_in, in, opcode);
op_mod = MLX5_GET(mbox_in, in, op_mod);
uid = MLX5_GET(mbox_in, in, uid);
+ err = cmd_status_to_err(status);
+
if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
- mlx5_core_err_rl(dev,
- "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
- mlx5_command_str(opcode), opcode, op_mod,
- cmd_status_str(status), status, syndrome);
+ mlx5_cmd_out_err(dev, opcode, op_mod, out);
else
mlx5_core_dbg(dev,
- "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
- mlx5_command_str(opcode),
- opcode, op_mod,
- cmd_status_str(status),
- status,
- syndrome);
+ "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
+ mlx5_command_str(opcode), opcode, op_mod, uid,
+ cmd_status_str(status), status, syndrome, err);
+}
+
+int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out)
+{
+ /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */
+ if (err == -ENXIO) {
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+ u32 syndrome;
+ u8 status;
- return cmd_status_to_err(status);
+ /* PCI Error, emulate command return status, for smooth reset */
+ err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status);
+ MLX5_SET(mbox_out, out, status, status);
+ MLX5_SET(mbox_out, out, syndrome, syndrome);
+ if (!err)
+ return 0;
+ }
+
+ /* driver or FW delivery error */
+ if (err != -EREMOTEIO && err)
+ return err;
+
+ /* check outbox status */
+ err = cmd_status_to_err(MLX5_GET(mbox_out, out, status));
+ if (err)
+ cmd_status_print(dev, in, out);
+
+ return err;
}
+EXPORT_SYMBOL(mlx5_cmd_check);
static void dump_command(struct mlx5_core_dev *dev,
struct mlx5_cmd_work_ent *ent, int input)
@@ -778,39 +849,41 @@ static void dump_command(struct mlx5_core_dev *dev,
int dump_len;
int i;
+ mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx);
data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
if (data_only)
mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
- "dump command data %s(0x%x) %s\n",
- mlx5_command_str(op), op,
+ "cmd[%d]: dump command data %s(0x%x) %s\n",
+ ent->idx, mlx5_command_str(op), op,
input ? "INPUT" : "OUTPUT");
else
- mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
- mlx5_command_str(op), op,
+ mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n",
+ ent->idx, mlx5_command_str(op), op,
input ? "INPUT" : "OUTPUT");
if (data_only) {
if (input) {
- dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+ dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx);
offset += sizeof(ent->lay->in);
} else {
- dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+ dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx);
offset += sizeof(ent->lay->out);
}
} else {
- dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+ dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx);
offset += sizeof(*ent->lay);
}
for (i = 0; i < n && next; i++) {
if (data_only) {
dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
- dump_buf(next->buf, dump_len, 1, offset);
+ dump_buf(next->buf, dump_len, 1, offset, ent->idx);
offset += MLX5_CMD_DATA_BLOCK_SIZE;
} else {
- mlx5_core_dbg(dev, "command block:\n");
- dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+ mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx);
+ dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset,
+ ent->idx);
offset += sizeof(struct mlx5_cmd_prot_block);
}
next = next->next;
@@ -818,6 +891,8 @@ static void dump_command(struct mlx5_core_dev *dev,
if (data_only)
pr_debug("\n");
+
+ mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx);
}
static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
@@ -837,41 +912,71 @@ static void cb_timeout_handler(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
cmd);
+ mlx5_cmd_eq_recover(dev);
+
+ /* Maybe got handled by eq recover ? */
+ if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
+ mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+ goto out; /* phew, already handled */
+ }
+
ent->ret = -ETIMEDOUT;
- mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
- mlx5_command_str(msg_to_opcode(ent->in)),
- msg_to_opcode(ent->in));
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
+ ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
+
+out:
+ cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
struct mlx5_cmd_msg *msg);
+static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
+{
+ if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
+ return true;
+
+ return cmd->allowed_opcode == opcode;
+}
+
+bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
+{
+ return pci_channel_offline(dev->pdev) ||
+ dev->cmd.state != MLX5_CMDIF_STATE_UP ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
+}
+
static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
struct mlx5_cmd *cmd = ent->cmd;
- struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
- unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+ bool poll_cmd = ent->polling;
struct mlx5_cmd_layout *lay;
+ struct mlx5_core_dev *dev;
+ unsigned long cb_timeout;
struct semaphore *sem;
unsigned long flags;
- bool poll_cmd = ent->polling;
int alloc_ret;
int cmd_mode;
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
+
+ complete(&ent->handling);
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem);
if (!ent->page_queue) {
- alloc_ret = alloc_ent(cmd);
+ alloc_ret = cmd_alloc_index(cmd);
if (alloc_ret < 0) {
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
if (ent->callback) {
ent->callback(-EAGAIN, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
- free_cmd(ent);
+ cmd_ent_put(ent);
} else {
ent->ret = -EAGAIN;
complete(&ent->done);
@@ -888,7 +993,6 @@ static void cmd_work_handler(struct work_struct *work)
}
cmd->ent_arr[ent->idx] = ent;
- set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
lay = get_inst(cmd, ent->idx);
ent->lay = lay;
memset(lay, 0, sizeof(*lay));
@@ -908,23 +1012,18 @@ static void cmd_work_handler(struct work_struct *work)
ent->ts1 = ktime_get_ns();
cmd_mode = cmd->mode;
- if (ent->callback)
- schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+ if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
+ cmd_ent_get(ent);
+ set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
/* Skip sending command to fw if internal error */
- if (pci_channel_offline(dev->pdev) ||
- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- u8 status = 0;
- u32 drv_synd;
-
- ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
- MLX5_SET(mbox_out, ent->out, status, status);
- MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
-
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
+ ent->ret = -ENXIO;
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
return;
}
+ cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
@@ -934,7 +1033,32 @@ static void cmd_work_handler(struct work_struct *work)
poll_timeout(ent);
/* make sure we read the descriptor after ownership is SW */
rmb();
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
+ }
+}
+
+static int deliv_status_to_err(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_DELIVERY_STAT_OK:
+ case MLX5_DRIVER_STATUS_ABORTED:
+ return 0;
+ case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+ case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+ return -EBADR;
+ case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+ case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+ case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+ return -EFAULT; /* Bad address */
+ case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+ case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+ case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+ case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+ return -ENOMSG;
+ case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+ return -EIO;
+ default:
+ return -EINVAL;
}
}
@@ -968,25 +1092,62 @@ static const char *deliv_status_to_str(u8 status)
}
}
+enum {
+ MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000,
+};
+
+static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
+
+ mlx5_cmd_eq_recover(dev);
+
+ /* Re-wait on the ent->done after executing the recovery flow. If the
+ * recovery flow (or any other recovery flow running simultaneously)
+ * has recovered an EQE, it should cause the entry to be completed by
+ * the command interface.
+ */
+ if (wait_for_completion_timeout(&ent->done, timeout)) {
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
+ mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+ return;
+ }
+
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
+ mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
+}
+
static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
{
- unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+ unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
struct mlx5_cmd *cmd = &dev->cmd;
int err;
- if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
- wait_for_completion(&ent->done);
- } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
- ent->ret = -ETIMEDOUT;
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ if (!wait_for_completion_timeout(&ent->handling, timeout) &&
+ cancel_work_sync(&ent->work)) {
+ ent->ret = -ECANCELED;
+ goto out_err;
}
+ if (cmd->mode == CMD_MODE_POLLING || ent->polling)
+ wait_for_completion(&ent->done);
+ else if (!wait_for_completion_timeout(&ent->done, timeout))
+ wait_func_handle_exec_timeout(dev, ent);
+out_err:
err = ent->ret;
if (err == -ETIMEDOUT) {
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
+ } else if (err == -ECANCELED) {
+ mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
}
mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
err, deliv_status_to_str(ent->status), ent->status);
@@ -997,16 +1158,27 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
/* Notes:
* 1. Callback functions may not sleep
* 2. page queue commands do not support asynchrous completion
+ *
+ * return value in case (!callback):
+ * ret < 0 : Command execution couldn't be submitted by driver
+ * ret > 0 : Command execution couldn't be performed by firmware
+ * ret == 0: Command was executed by FW, Caller must check FW outbox status.
+ *
+ * return value in case (callback):
+ * ret < 0 : Command execution couldn't be submitted by driver
+ * ret == 0: Command will be submitted to FW for execution
+ * and the callback will be called for further status updates
*/
static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out, void *uout, int uout_size,
mlx5_cmd_cbk_t callback,
- void *context, int page_queue, u8 *status,
+ void *context, int page_queue,
u8 token, bool force_polling)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
struct mlx5_cmd_stats *stats;
+ u8 status = 0;
int err = 0;
s64 ds;
u16 op;
@@ -1014,14 +1186,20 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;
- ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
- page_queue);
+ ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
+ callback, context, page_queue);
if (IS_ERR(ent))
return PTR_ERR(ent);
+ /* put for this ent is when consumed, depending on the use case
+ * 1) (!callback) blocking flow: by caller after wait_func completes
+ * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
+ */
+
ent->token = token;
ent->polling = force_polling;
+ init_completion(&ent->handling);
if (!callback)
init_completion(&ent->done);
@@ -1031,20 +1209,20 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
cmd_work_handler(&ent->work);
} else if (!queue_work(cmd->wq, &ent->work)) {
mlx5_core_warn(dev, "failed to queue work\n");
- err = -ENOMEM;
+ err = -EALREADY;
goto out_free;
}
if (callback)
- goto out;
+ return 0; /* mlx5_cmd_comp_handler() will put(ent) */
err = wait_func(dev, ent);
- if (err == -ETIMEDOUT)
- goto out;
+ if (err == -ETIMEDOUT || err == -ECANCELED)
+ goto out_free;
ds = ent->ts2 - ent->ts1;
op = MLX5_GET(mbox_in, in->first.data, opcode);
- if (op < ARRAY_SIZE(cmd->stats)) {
+ if (op < MLX5_CMD_OP_MAX) {
stats = &cmd->stats[op];
spin_lock_irq(&stats->lock);
stats->sum += ds;
@@ -1054,12 +1232,11 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
"fw exec time for %s is %lld nsec\n",
mlx5_command_str(op), ds);
- *status = ent->status;
out_free:
- free_cmd(ent);
-out:
- return err;
+ status = ent->status;
+ cmd_ent_put(ent);
+ return err ? : status;
}
static ssize_t dbg_write(struct file *filp, const char __user *buf,
@@ -1376,7 +1553,7 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
{
struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
- dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
+ dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev));
debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops);
debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops);
@@ -1387,6 +1564,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
mlx5_cmdif_debugfs_init(dev);
}
+void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ down(&cmd->sem);
+ down(&cmd->pages_sem);
+
+ cmd->allowed_opcode = opcode;
+
+ up(&cmd->pages_sem);
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ up(&cmd->sem);
+}
+
static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
{
struct mlx5_cmd *cmd = &dev->cmd;
@@ -1461,8 +1654,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
vector = vec & 0xffffffff;
for (i = 0; i < (1 << cmd->log_sz); i++) {
if (test_bit(i, &vector)) {
- struct semaphore *sem;
-
ent = cmd->ent_arr[i];
/* if we already completed the command, ignore it */
@@ -1472,42 +1663,39 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
if (!forced) {
mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
ent->idx);
- free_ent(cmd, ent->idx);
- free_cmd(ent);
+ cmd_ent_put(ent);
}
continue;
}
- if (ent->callback)
- cancel_delayed_work(&ent->cb_timeout_work);
- if (ent->page_queue)
- sem = &cmd->pages_sem;
- else
- sem = &cmd->sem;
+ if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work))
+ cmd_ent_put(ent); /* timeout work was canceled */
+
+ if (!forced || /* Real FW completion */
+ pci_channel_offline(dev->pdev) || /* FW is inaccessible */
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ cmd_ent_put(ent);
+
ent->ts2 = ktime_get_ns();
memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
dump_command(dev, ent, 0);
- if (!ent->ret) {
+
+ if (vec & MLX5_TRIGGERED_CMD_COMP)
+ ent->ret = -ENXIO;
+
+ if (!ent->ret) { /* Command completed by FW */
if (!cmd->checksum_disabled)
ent->ret = verify_signature(ent);
- else
- ent->ret = 0;
- if (vec & MLX5_TRIGGERED_CMD_COMP)
- ent->status = MLX5_DRIVER_STATUS_ABORTED;
- else
- ent->status = ent->lay->status_own >> 1;
+
+ ent->status = ent->lay->status_own >> 1;
mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
ent->ret, deliv_status_to_str(ent->status), ent->status);
}
- /* only real completion will free the entry slot */
- if (!forced)
- free_ent(cmd, ent->idx);
-
if (ent->callback) {
ds = ent->ts2 - ent->ts1;
- if (ent->op < ARRAY_SIZE(cmd->stats)) {
+ if (ent->op < MLX5_CMD_OP_MAX) {
stats = &cmd->stats[ent->op];
spin_lock_irqsave(&stats->lock, flags);
stats->sum += ds;
@@ -1517,36 +1705,38 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
callback = ent->callback;
context = ent->context;
- err = ent->ret;
- if (!err) {
+ err = ent->ret ? : ent->status;
+ if (err > 0) /* Failed in FW, command didn't execute */
+ err = deliv_status_to_err(err);
+
+ if (!err)
err = mlx5_copy_from_msg(ent->uout,
ent->out,
ent->uout_size);
- err = err ? err : mlx5_cmd_check(dev,
- ent->in->first.data,
- ent->uout);
- }
-
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
- err = err ? err : ent->status;
- if (!forced)
- free_cmd(ent);
+ /* final consumer is done, release ent */
+ cmd_ent_put(ent);
callback(err, context);
} else {
+ /* release wait_func() so mlx5_cmd_invoke()
+ * can make the final ent_put()
+ */
complete(&ent->done);
}
- up(sem);
}
}
}
-void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ unsigned long bitmask;
unsigned long flags;
u64 vector;
+ int i;
/* wait for pending handlers to complete */
mlx5_eq_synchronize_cmd_irq(dev);
@@ -1555,11 +1745,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
if (!vector)
goto no_trig;
+ bitmask = vector;
+ /* we must increment the allocated entries refcount before triggering the completions
+ * to guarantee pending commands will not get freed in the meanwhile.
+ * For that reason, it also has to be done inside the alloc_lock.
+ */
+ for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
+ cmd_ent_get(cmd->ent_arr[i]);
vector |= MLX5_TRIGGERED_CMD_COMP;
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
mlx5_cmd_comp_handler(dev, vector, true);
+ for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
+ cmd_ent_put(cmd->ent_arr[i]);
return;
no_trig:
@@ -1571,12 +1770,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev)
struct mlx5_cmd *cmd = &dev->cmd;
int i;
- for (i = 0; i < cmd->max_reg_cmds; i++)
- while (down_trylock(&cmd->sem))
+ for (i = 0; i < cmd->max_reg_cmds; i++) {
+ while (down_trylock(&cmd->sem)) {
mlx5_cmd_trigger_completions(dev);
+ cond_resched();
+ }
+ }
- while (down_trylock(&cmd->pages_sem))
+ while (down_trylock(&cmd->pages_sem)) {
mlx5_cmd_trigger_completions(dev);
+ cond_resched();
+ }
/* Unlock cmdif */
up(&cmd->pages_sem);
@@ -1584,31 +1788,6 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev)
up(&cmd->sem);
}
-static int status_to_err(u8 status)
-{
- switch (status) {
- case MLX5_CMD_DELIVERY_STAT_OK:
- case MLX5_DRIVER_STATUS_ABORTED:
- return 0;
- case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
- case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
- return -EBADR;
- case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
- case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
- case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
- return -EFAULT; /* Bad address */
- case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
- case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
- case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
- case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
- return -ENOMSG;
- case MLX5_CMD_DELIVERY_STAT_FW_ERR:
- return -EIO;
- default:
- return -EINVAL;
- }
-}
-
static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
gfp_t gfp)
{
@@ -1652,28 +1831,23 @@ static int is_manage_pages(void *in)
return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES;
}
+/* Notes:
+ * 1. Callback functions may not sleep
+ * 2. Page queue commands do not support asynchrous completion
+ */
static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int out_size, mlx5_cmd_cbk_t callback, void *context,
bool force_polling)
{
- struct mlx5_cmd_msg *inb;
- struct mlx5_cmd_msg *outb;
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+ struct mlx5_cmd_msg *inb, *outb;
int pages_queue;
gfp_t gfp;
- int err;
- u8 status = 0;
- u32 drv_synd;
u8 token;
+ int err;
- if (pci_channel_offline(dev->pdev) ||
- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- u16 opcode = MLX5_GET(mbox_in, in, opcode);
-
- err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
- MLX5_SET(mbox_out, out, status, status);
- MLX5_SET(mbox_out, out, syndrome, drv_synd);
- return err;
- }
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode))
+ return -ENXIO;
pages_queue = is_manage_pages(in);
gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
@@ -1699,46 +1873,143 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
}
err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
- pages_queue, &status, token, force_polling);
+ pages_queue, token, force_polling);
+ if (callback)
+ return err;
+
+ if (err > 0) /* Failed in FW, command didn't execute */
+ err = deliv_status_to_err(err);
+
if (err)
goto out_out;
- mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
- if (status) {
- err = status_to_err(status);
- goto out_out;
+ /* command completed by FW */
+ err = mlx5_copy_from_msg(out, outb, out_size);
+out_out:
+ mlx5_free_cmd_msg(dev, outb);
+out_in:
+ free_msg(dev, inb);
+ return err;
+}
+
+static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
+ u32 syndrome, int err)
+{
+ struct mlx5_cmd_stats *stats;
+
+ if (!err)
+ return;
+
+ stats = &dev->cmd.stats[opcode];
+ spin_lock_irq(&stats->lock);
+ stats->failed++;
+ if (err < 0)
+ stats->last_failed_errno = -err;
+ if (err == -EREMOTEIO) {
+ stats->failed_mbox_status++;
+ stats->last_failed_mbox_status = status;
+ stats->last_failed_syndrome = syndrome;
}
+ spin_unlock_irq(&stats->lock);
+}
- if (!callback)
- err = mlx5_copy_from_msg(out, outb, out_size);
+/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
+static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
+{
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
-out_out:
- if (!callback)
- mlx5_free_cmd_msg(dev, outb);
+ if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
+ err = -EIO;
-out_in:
- if (!callback)
- free_msg(dev, inb);
+ if (!err && status != MLX5_CMD_STAT_OK)
+ err = -EREMOTEIO;
+
+ cmd_status_log(dev, opcode, status, syndrome, err);
+ return err;
+}
+
+/**
+ * mlx5_cmd_do - Executes a fw command, wait for completion.
+ * Unlike mlx5_cmd_exec, this function will not translate or intercept
+ * outbox.status and will return -EREMOTEIO when
+ * outbox.status != MLX5_CMD_STAT_OK
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return:
+ * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK.
+ * Caller must check FW outbox status.
+ * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK.
+ * < 0 : Command execution couldn't be performed by firmware or driver
+ */
+int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
+{
+ int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+ err = cmd_status_err(dev, err, opcode, out);
return err;
}
+EXPORT_SYMBOL(mlx5_cmd_do);
+/**
+ * mlx5_cmd_exec - Executes a fw command, wait for completion
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return: 0 if no error, FW command execution was successful
+ * and outbox status is ok.
+ */
int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int out_size)
{
- int err;
+ int err = mlx5_cmd_do(dev, in, in_size, out, out_size);
- err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
- return err ? : mlx5_cmd_check(dev, in, out);
+ return mlx5_cmd_check(dev, err, in, out);
}
EXPORT_SYMBOL(mlx5_cmd_exec);
+/**
+ * mlx5_cmd_exec_polling - Executes a fw command, poll for completion
+ * Needed for driver force teardown, when command completion EQ
+ * will not be available to complete the command
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return: 0 if no error, FW command execution was successful
+ * and outbox status is ok.
+ */
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size)
+{
+ int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+ err = cmd_status_err(dev, err, opcode, out);
+ return mlx5_cmd_check(dev, err, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_polling);
+
void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
struct mlx5_async_ctx *ctx)
{
ctx->dev = dev;
/* Starts at 1 to avoid doing wake_up if we are not cleaning up */
atomic_set(&ctx->num_inflight, 1);
- init_waitqueue_head(&ctx->wait);
+ init_completion(&ctx->inflight_done);
}
EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
@@ -1752,19 +2023,21 @@ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
*/
void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
{
- atomic_dec(&ctx->num_inflight);
- wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
+ if (!atomic_dec_and_test(&ctx->num_inflight))
+ wait_for_completion(&ctx->inflight_done);
}
EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
static void mlx5_cmd_exec_cb_handler(int status, void *_work)
{
struct mlx5_async_work *work = _work;
- struct mlx5_async_ctx *ctx = work->ctx;
+ struct mlx5_async_ctx *ctx;
+ ctx = work->ctx;
+ status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
work->user_callback(status, work);
if (atomic_dec_and_test(&ctx->num_inflight))
- wake_up(&ctx->wait);
+ complete(&ctx->inflight_done);
}
int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
@@ -1775,28 +2048,19 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
work->ctx = ctx;
work->user_callback = callback;
+ work->opcode = MLX5_GET(mbox_in, in, opcode);
+ work->out = out;
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
return -EIO;
ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
mlx5_cmd_exec_cb_handler, work, false);
if (ret && atomic_dec_and_test(&ctx->num_inflight))
- wake_up(&ctx->wait);
+ complete(&ctx->inflight_done);
return ret;
}
EXPORT_SYMBOL(mlx5_cmd_exec_cb);
-int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
- void *out, int out_size)
-{
- int err;
-
- err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
-
- return err ? : mlx5_cmd_check(dev, in, out);
-}
-EXPORT_SYMBOL(mlx5_cmd_exec_polling);
-
static void destroy_msg_cache(struct mlx5_core_dev *dev)
{
struct cmd_msg_cache *ch;
@@ -1853,9 +2117,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev)
static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
{
- struct device *ddev = dev->device;
-
- cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+ cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE,
&cmd->alloc_dma, GFP_KERNEL);
if (!cmd->cmd_alloc_buf)
return -ENOMEM;
@@ -1868,9 +2130,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
return 0;
}
- dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
cmd->alloc_dma);
- cmd->cmd_alloc_buf = dma_alloc_coherent(ddev,
+ cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev),
2 * MLX5_ADAPTER_PAGE_SIZE - 1,
&cmd->alloc_dma, GFP_KERNEL);
if (!cmd->cmd_alloc_buf)
@@ -1884,12 +2146,15 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
{
- struct device *ddev = dev->device;
-
- dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf,
cmd->alloc_dma);
}
+static u16 cmdif_rev(struct mlx5_core_dev *dev)
+{
+ return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+}
+
int mlx5_cmd_init(struct mlx5_core_dev *dev)
{
int size = sizeof(struct mlx5_cmd_prot_block);
@@ -1909,10 +2174,16 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
return -EINVAL;
}
- cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0);
- if (!cmd->pool)
+ cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL);
+ if (!cmd->stats)
return -ENOMEM;
+ cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
+ if (!cmd->pool) {
+ err = -ENOMEM;
+ goto dma_pool_err;
+ }
+
err = alloc_cmd_page(dev, cmd);
if (err)
goto err_free_pool;
@@ -1933,6 +2204,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
goto err_free_page;
}
+ cmd->state = MLX5_CMDIF_STATE_DOWN;
cmd->checksum_disabled = 1;
cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1;
@@ -1947,7 +2219,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
spin_lock_init(&cmd->alloc_lock);
spin_lock_init(&cmd->token_lock);
- for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
+ for (i = 0; i < MLX5_CMD_OP_MAX; i++)
spin_lock_init(&cmd->stats[i].lock);
sema_init(&cmd->sem, cmd->max_reg_cmds);
@@ -1970,6 +2242,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
cmd->mode = CMD_MODE_POLLING;
+ cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
create_msg_cache(dev);
@@ -1993,10 +2266,10 @@ err_free_page:
err_free_pool:
dma_pool_destroy(cmd->pool);
-
+dma_pool_err:
+ kvfree(cmd->stats);
return err;
}
-EXPORT_SYMBOL(mlx5_cmd_init);
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
{
@@ -2007,5 +2280,11 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
destroy_msg_cache(dev);
free_cmd_page(dev, cmd);
dma_pool_destroy(cmd->pool);
+ kvfree(cmd->stats);
+}
+
+void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
+ enum mlx5_cmdif_state cmdif_state)
+{
+ dev->cmd.state = cmdif_state;
}
-EXPORT_SYMBOL(mlx5_cmd_cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 818edc63e428..4caa1b6f40ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -31,10 +31,8 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include <rdma/ib_verbs.h>
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
@@ -43,11 +41,11 @@
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
-void mlx5_cq_tasklet_cb(unsigned long data)
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
- struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+ struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task);
struct mlx5_core_cq *mcq;
struct mlx5_core_cq *temp;
@@ -87,12 +85,13 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}
-int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
- u32 *in, int inlen, u32 *out, int outlen)
+/* Callers must verify outbox status in case of err */
+int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen, u32 *out, int outlen)
{
- int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
- u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
- u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ c_eqn_or_apu_element);
+ u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
struct mlx5_eq_comp *eq;
int err;
@@ -102,7 +101,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
memset(out, 0, outlen);
MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
if (err)
return err;
@@ -136,41 +135,49 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->cqn);
cq->uar = dev->priv.uar;
+ cq->irqn = eq->core.irqn;
return 0;
err_cq_add:
mlx5_eq_del_cq(&eq->core, cq);
err_cmd:
- memset(din, 0, sizeof(din));
- memset(dout, 0, sizeof(dout));
MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
MLX5_SET(destroy_cq_in, din, uid, cq->uid);
- mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
+ mlx5_cmd_exec_in(dev, destroy_cq, din);
return err;
}
+EXPORT_SYMBOL(mlx5_create_cq);
+
+/* oubox is checked and err val is normalized */
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen, u32 *out, int outlen)
+{
+ int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen);
+
+ return mlx5_cmd_check(dev, err, in, out);
+}
EXPORT_SYMBOL(mlx5_core_create_cq);
int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
{
- u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
int err;
+ mlx5_debug_cq_remove(dev, cq);
+
mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
mlx5_eq_del_cq(&cq->eq->core, cq);
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
MLX5_SET(destroy_cq_in, in, uid, cq->uid);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_in(dev, destroy_cq, in);
if (err)
return err;
synchronize_irq(cq->irqn);
-
- mlx5_debug_cq_remove(dev, cq);
mlx5_cq_put(cq);
wait_for_completion(&cq->free);
@@ -179,20 +186,20 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
EXPORT_SYMBOL(mlx5_core_destroy_cq);
int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
- u32 *out, int outlen)
+ u32 *out)
{
- u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {};
MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
MLX5_SET(query_cq_in, in, cqn, cq->cqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ return mlx5_cmd_exec_inout(dev, query_cq, in, out);
}
EXPORT_SYMBOL(mlx5_core_query_cq);
int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen)
{
- u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {};
MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
MLX5_SET(modify_cq_in, in, uid, cq->uid);
@@ -205,7 +212,7 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
u16 cq_period,
u16 cq_max_count)
{
- u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {};
void *cqc;
MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 04854e5fbcd7..3e232a65a0c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/cq.h>
@@ -99,26 +98,32 @@ void mlx5_unregister_debugfs(void)
debugfs_remove(mlx5_debugfs_root);
}
-void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev)
{
- atomic_set(&dev->num_qps, 0);
+ return dev->priv.dbg.dbg_root;
+}
+EXPORT_SYMBOL(mlx5_debugfs_get_dev_root);
- dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root);
+void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+{
+ dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root);
}
+EXPORT_SYMBOL(mlx5_qp_debugfs_init);
void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.qp_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.qp_debugfs);
}
+EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup);
void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
{
- dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root);
+ dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root);
}
void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.eq_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.eq_debugfs);
}
static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
@@ -161,6 +166,28 @@ static const struct file_operations stats_fops = {
.write = average_write,
};
+static ssize_t slots_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cmd *cmd;
+ char tbuf[6];
+ int weight;
+ int field;
+ int ret;
+
+ cmd = filp->private_data;
+ weight = bitmap_weight(&cmd->bitmask, cmd->max_reg_cmds);
+ field = cmd->max_reg_cmds - weight;
+ ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations slots_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = slots_read,
+};
+
void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
{
struct mlx5_cmd_stats *stats;
@@ -168,10 +195,12 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
const char *namep;
int i;
- cmd = &dev->priv.cmdif_debugfs;
- *cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
+ cmd = &dev->priv.dbg.cmdif_debugfs;
+ *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root);
- for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
+ debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops);
+
+ for (i = 0; i < MLX5_CMD_OP_MAX; i++) {
stats = &dev->cmd.stats[i];
namep = mlx5_command_str(i);
if (strcmp(namep, "unknown command opcode")) {
@@ -180,64 +209,94 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
debugfs_create_file("average", 0400, stats->root, stats,
&stats_fops);
debugfs_create_u64("n", 0400, stats->root, &stats->n);
+ debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
+ debugfs_create_u64("failed_mbox_status", 0400, stats->root,
+ &stats->failed_mbox_status);
+ debugfs_create_u32("last_failed_errno", 0400, stats->root,
+ &stats->last_failed_errno);
+ debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
+ &stats->last_failed_mbox_status);
+ debugfs_create_x32("last_failed_syndrome", 0400, stats->root,
+ &stats->last_failed_syndrome);
}
}
}
void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs);
}
void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
{
- dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root);
+ dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root);
}
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
{
- debugfs_remove_recursive(dev->priv.cq_debugfs);
+ debugfs_remove_recursive(dev->priv.dbg.cq_debugfs);
+}
+
+void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
+{
+ struct dentry *pages;
+
+ dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root);
+ pages = dev->priv.dbg.pages_debugfs;
+
+ debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
+ debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages);
+ debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages);
+ debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
+ debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);
+ debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages,
+ &dev->priv.reclaim_pages_discard);
+}
+
+void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.pages_debugfs);
}
static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
int index, int *is_str)
{
int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
- struct mlx5_qp_context *ctx;
+ u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
u64 param = 0;
u32 *out;
+ int state;
+ u32 *qpc;
int err;
- int no_sq;
out = kzalloc(outlen, GFP_KERNEL);
if (!out)
- return param;
+ return 0;
- err = mlx5_core_qp_query(dev, qp, out, outlen);
- if (err) {
- mlx5_core_warn(dev, "failed to query qp err=%d\n", err);
+ MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+ MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ err = mlx5_cmd_exec_inout(dev, query_qp, in, out);
+ if (err)
goto out;
- }
*is_str = 0;
- /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
- ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc);
-
+ qpc = MLX5_ADDR_OF(query_qp_out, out, qpc);
switch (index) {
case QP_PID:
param = qp->pid;
break;
case QP_STATE:
- param = (unsigned long)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
+ state = MLX5_GET(qpc, qpc, state);
+ param = (unsigned long)mlx5_qp_state_str(state);
*is_str = 1;
break;
case QP_XPORT:
- param = (unsigned long)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
+ param = (unsigned long)mlx5_qp_type_str(MLX5_GET(qpc, qpc, st));
*is_str = 1;
break;
case QP_MTU:
- switch (ctx->mtu_msgmax >> 5) {
+ switch (MLX5_GET(qpc, qpc, mtu)) {
case IB_MTU_256:
param = 256;
break;
@@ -258,46 +317,32 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
}
break;
case QP_N_RECV:
- param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+ param = 1 << MLX5_GET(qpc, qpc, log_rq_size);
break;
case QP_RECV_SZ:
- param = 1 << ((ctx->rq_size_stride & 7) + 4);
+ param = 1 << (MLX5_GET(qpc, qpc, log_rq_stride) + 4);
break;
case QP_N_SEND:
- no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
- if (!no_sq)
- param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
- else
- param = 0;
+ if (!MLX5_GET(qpc, qpc, no_sq))
+ param = 1 << MLX5_GET(qpc, qpc, log_sq_size);
break;
case QP_LOG_PG_SZ:
- param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f;
- param += 12;
+ param = MLX5_GET(qpc, qpc, log_page_size) + 12;
break;
case QP_RQPN:
- param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff;
+ param = MLX5_GET(qpc, qpc, remote_qpn);
break;
}
-
out:
kfree(out);
return param;
}
-static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
-
- MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
- MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
int index)
{
int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
u64 param = 0;
void *ctx;
u32 *out;
@@ -307,7 +352,9 @@ static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
if (!out)
return param;
- err = mlx5_core_eq_query(dev, eq, out, outlen);
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ err = mlx5_cmd_exec_inout(dev, query_eq, in, out);
if (err) {
mlx5_core_warn(dev, "failed to query eq\n");
goto out;
@@ -344,7 +391,7 @@ static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
if (!out)
return param;
- err = mlx5_core_query_cq(dev, cq, out, outlen);
+ err = mlx5_core_query_cq(dev, cq, out);
if (err) {
mlx5_core_warn(dev, "failed to query cq\n");
goto out;
@@ -453,7 +500,7 @@ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
if (!mlx5_debugfs_root)
return 0;
- err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
+ err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs,
&qp->dbg, qp->qpn, qp_fields,
ARRAY_SIZE(qp_fields), qp);
if (err)
@@ -461,6 +508,7 @@ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
return err;
}
+EXPORT_SYMBOL(mlx5_debug_qp_add);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
{
@@ -470,6 +518,7 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
if (qp->dbg)
rem_res_tree(qp->dbg);
}
+EXPORT_SYMBOL(mlx5_debug_qp_remove);
int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
{
@@ -478,7 +527,7 @@ int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (!mlx5_debugfs_root)
return 0;
- err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
+ err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs,
&eq->dbg, eq->eqn, eq_fields,
ARRAY_SIZE(eq_fields), eq);
if (err)
@@ -503,7 +552,7 @@ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
if (!mlx5_debugfs_root)
return 0;
- err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
+ err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs,
&cq->dbg, cq->cqn, cq_fields,
ARRAY_SIZE(cq_fields), cq);
if (err)
@@ -517,6 +566,8 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
if (!mlx5_debugfs_root)
return;
- if (cq->dbg)
+ if (cq->dbg) {
rem_res_tree(cq->dbg);
+ cq->dbg = NULL;
+ }
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 50862275544e..0571e40c6ee5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -31,315 +31,612 @@
*/
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/mlx5_ifc_vdpa.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
-static LIST_HEAD(intf_list);
-static LIST_HEAD(mlx5_dev_list);
/* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex);
+static DEFINE_IDA(mlx5_adev_ida);
-struct mlx5_device_context {
- struct list_head list;
- struct mlx5_interface *intf;
- void *context;
- unsigned long state;
-};
+static bool is_eth_rep_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_ESWITCH))
+ return false;
-enum {
- MLX5_INTERFACE_ADDED,
- MLX5_INTERFACE_ATTACHED,
-};
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return false;
+
+ if (!is_mdev_switchdev_mode(dev))
+ return false;
+ return true;
+}
-void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+bool mlx5_eth_supported(struct mlx5_core_dev *dev)
{
- struct mlx5_device_context *dev_ctx;
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+ if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
+ return false;
- if (!mlx5_lag_intf_add(intf, priv))
- return;
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
- dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
- if (!dev_ctx)
- return;
+ if (!MLX5_CAP_GEN(dev, eth_net_offloads)) {
+ mlx5_core_warn(dev, "Missing eth_net_offloads capability\n");
+ return false;
+ }
- dev_ctx->intf = intf;
+ if (!MLX5_CAP_GEN(dev, nic_flow_table)) {
+ mlx5_core_warn(dev, "Missing nic_flow_table capability\n");
+ return false;
+ }
- dev_ctx->context = intf->add(dev);
- if (dev_ctx->context) {
- set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
- if (intf->attach)
- set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+ if (!MLX5_CAP_ETH(dev, csum_cap)) {
+ mlx5_core_warn(dev, "Missing csum_cap capability\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, max_lso_cap)) {
+ mlx5_core_warn(dev, "Missing max_lso_cap capability\n");
+ return false;
+ }
- spin_lock_irq(&priv->ctx_lock);
- list_add_tail(&dev_ctx->list, &priv->ctx_list);
- spin_unlock_irq(&priv->ctx_lock);
+ if (!MLX5_CAP_ETH(dev, vlan_cap)) {
+ mlx5_core_warn(dev, "Missing vlan_cap capability\n");
+ return false;
}
- if (!dev_ctx->context)
- kfree(dev_ctx);
+ if (!MLX5_CAP_ETH(dev, rss_ind_tbl_cap)) {
+ mlx5_core_warn(dev, "Missing rss_ind_tbl_cap capability\n");
+ return false;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.max_ft_level) < 3) {
+ mlx5_core_warn(dev, "max_ft_level < 3\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, self_lb_en_modifiable))
+ mlx5_core_warn(dev, "Self loop back prevention is not supported\n");
+ if (!MLX5_CAP_GEN(dev, cq_moderation))
+ mlx5_core_warn(dev, "CQ moderation is not supported\n");
+
+ return true;
}
-static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
- struct mlx5_priv *priv)
+static bool is_eth_enabled(struct mlx5_core_dev *dev)
{
- struct mlx5_device_context *dev_ctx;
+ union devlink_param_value val;
+ int err;
- list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf == intf)
- return dev_ctx;
- return NULL;
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ &val);
+ return err ? false : val.vbool;
}
-void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
{
- struct mlx5_device_context *dev_ctx;
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+ if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
+ return false;
- dev_ctx = mlx5_get_device(intf, priv);
- if (!dev_ctx)
- return;
+ if (mlx5_core_is_pf(dev))
+ return false;
- spin_lock_irq(&priv->ctx_lock);
- list_del(&dev_ctx->list);
- spin_unlock_irq(&priv->ctx_lock);
+ if (!(MLX5_CAP_GEN_64(dev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q))
+ return false;
- if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- intf->remove(dev, dev_ctx->context);
+ if (!(MLX5_CAP_DEV_VDPA_EMULATION(dev, event_mode) &
+ MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE))
+ return false;
- kfree(dev_ctx);
+ if (!MLX5_CAP_DEV_VDPA_EMULATION(dev, eth_frame_offload_type))
+ return false;
+
+ return true;
}
-static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+static bool is_vnet_enabled(struct mlx5_core_dev *dev)
{
- struct mlx5_device_context *dev_ctx;
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
- dev_ctx = mlx5_get_device(intf, priv);
- if (!dev_ctx)
- return;
+ union devlink_param_value val;
+ int err;
- if (intf->attach) {
- if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- return;
- if (intf->attach(dev, dev_ctx->context))
- return;
- set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
- } else {
- if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- return;
- dev_ctx->context = intf->add(dev);
- if (!dev_ctx->context)
- return;
- set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
- }
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ &val);
+ return err ? false : val.vbool;
}
-void mlx5_attach_device(struct mlx5_core_dev *dev)
+static bool is_ib_rep_supported(struct mlx5_core_dev *dev)
{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_interface *intf;
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return false;
- mutex_lock(&mlx5_intf_mutex);
- list_for_each_entry(intf, &intf_list, list)
- mlx5_attach_interface(intf, priv);
- mutex_unlock(&mlx5_intf_mutex);
+ if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
+ return false;
+
+ if (!is_eth_rep_supported(dev))
+ return false;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return false;
+
+ if (!is_mdev_switchdev_mode(dev))
+ return false;
+
+ if (mlx5_core_mp_enabled(dev))
+ return false;
+
+ return true;
}
-static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+static bool is_mp_supported(struct mlx5_core_dev *dev)
{
- struct mlx5_device_context *dev_ctx;
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return false;
- dev_ctx = mlx5_get_device(intf, priv);
- if (!dev_ctx)
- return;
+ if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
+ return false;
- if (intf->detach) {
- if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- return;
- intf->detach(dev, dev_ctx->context);
- clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
- } else {
- if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- return;
- intf->remove(dev, dev_ctx->context);
- clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
- }
+ if (is_ib_rep_supported(dev))
+ return false;
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
+
+ if (!mlx5_core_is_mp_slave(dev))
+ return false;
+
+ return true;
}
-void mlx5_detach_device(struct mlx5_core_dev *dev)
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev)
{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_interface *intf;
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return false;
- mutex_lock(&mlx5_intf_mutex);
- list_for_each_entry(intf, &intf_list, list)
- mlx5_detach_interface(intf, priv);
- mutex_unlock(&mlx5_intf_mutex);
+ if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
+ return false;
+
+ if (is_ib_rep_supported(dev))
+ return false;
+
+ if (is_mp_supported(dev))
+ return false;
+
+ return true;
}
-bool mlx5_device_registered(struct mlx5_core_dev *dev)
+static bool is_ib_enabled(struct mlx5_core_dev *dev)
{
- struct mlx5_priv *priv;
- bool found = false;
+ union devlink_param_value val;
+ int err;
- mutex_lock(&mlx5_intf_mutex);
- list_for_each_entry(priv, &mlx5_dev_list, dev_list)
- if (priv == &dev->priv)
- found = true;
- mutex_unlock(&mlx5_intf_mutex);
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ &val);
+ return err ? false : val.vbool;
+}
- return found;
+enum {
+ MLX5_INTERFACE_PROTOCOL_ETH,
+ MLX5_INTERFACE_PROTOCOL_ETH_REP,
+
+ MLX5_INTERFACE_PROTOCOL_IB,
+ MLX5_INTERFACE_PROTOCOL_IB_REP,
+ MLX5_INTERFACE_PROTOCOL_MPIB,
+
+ MLX5_INTERFACE_PROTOCOL_VNET,
+};
+
+static const struct mlx5_adev_device {
+ const char *suffix;
+ bool (*is_supported)(struct mlx5_core_dev *dev);
+ bool (*is_enabled)(struct mlx5_core_dev *dev);
+} mlx5_adev_devices[] = {
+ [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet",
+ .is_supported = &mlx5_vnet_supported,
+ .is_enabled = &is_vnet_enabled },
+ [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma",
+ .is_supported = &mlx5_rdma_supported,
+ .is_enabled = &is_ib_enabled },
+ [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
+ .is_supported = &mlx5_eth_supported,
+ .is_enabled = &is_eth_enabled },
+ [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
+ .is_supported = &is_eth_rep_supported },
+ [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
+ .is_supported = &is_ib_rep_supported },
+ [MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport",
+ .is_supported = &is_mp_supported },
+};
+
+int mlx5_adev_idx_alloc(void)
+{
+ return ida_alloc(&mlx5_adev_ida, GFP_KERNEL);
}
-int mlx5_register_device(struct mlx5_core_dev *dev)
+void mlx5_adev_idx_free(int idx)
+{
+ ida_free(&mlx5_adev_ida, idx);
+}
+
+int mlx5_adev_init(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
- struct mlx5_interface *intf;
- mutex_lock(&mlx5_intf_mutex);
- list_add_tail(&priv->dev_list, &mlx5_dev_list);
- list_for_each_entry(intf, &intf_list, list)
- mlx5_add_device(intf, priv);
- mutex_unlock(&mlx5_intf_mutex);
+ priv->adev = kcalloc(ARRAY_SIZE(mlx5_adev_devices),
+ sizeof(struct mlx5_adev *), GFP_KERNEL);
+ if (!priv->adev)
+ return -ENOMEM;
return 0;
}
-void mlx5_unregister_device(struct mlx5_core_dev *dev)
+void mlx5_adev_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
- struct mlx5_interface *intf;
- mutex_lock(&mlx5_intf_mutex);
- list_for_each_entry_reverse(intf, &intf_list, list)
- mlx5_remove_device(intf, priv);
- list_del(&priv->dev_list);
- mutex_unlock(&mlx5_intf_mutex);
+ kfree(priv->adev);
}
-int mlx5_register_interface(struct mlx5_interface *intf)
+static void adev_release(struct device *dev)
{
- struct mlx5_priv *priv;
+ struct mlx5_adev *mlx5_adev =
+ container_of(dev, struct mlx5_adev, adev.dev);
+ struct mlx5_priv *priv = &mlx5_adev->mdev->priv;
+ int idx = mlx5_adev->idx;
- if (!intf->add || !intf->remove)
- return -EINVAL;
+ kfree(mlx5_adev);
+ priv->adev[idx] = NULL;
+}
- mutex_lock(&mlx5_intf_mutex);
- list_add_tail(&intf->list, &intf_list);
- list_for_each_entry(priv, &mlx5_dev_list, dev_list)
- mlx5_add_device(intf, priv);
- mutex_unlock(&mlx5_intf_mutex);
+static struct mlx5_adev *add_adev(struct mlx5_core_dev *dev, int idx)
+{
+ const char *suffix = mlx5_adev_devices[idx].suffix;
+ struct auxiliary_device *adev;
+ struct mlx5_adev *madev;
+ int ret;
+
+ madev = kzalloc(sizeof(*madev), GFP_KERNEL);
+ if (!madev)
+ return ERR_PTR(-ENOMEM);
+
+ adev = &madev->adev;
+ adev->id = dev->priv.adev_idx;
+ adev->name = suffix;
+ adev->dev.parent = dev->device;
+ adev->dev.release = adev_release;
+ madev->mdev = dev;
+ madev->idx = idx;
+
+ ret = auxiliary_device_init(adev);
+ if (ret) {
+ kfree(madev);
+ return ERR_PTR(ret);
+ }
- return 0;
+ ret = auxiliary_device_add(adev);
+ if (ret) {
+ auxiliary_device_uninit(adev);
+ return ERR_PTR(ret);
+ }
+ return madev;
+}
+
+static void del_adev(struct auxiliary_device *adev)
+{
+ auxiliary_device_delete(adev);
+ auxiliary_device_uninit(adev);
}
-EXPORT_SYMBOL(mlx5_register_interface);
-void mlx5_unregister_interface(struct mlx5_interface *intf)
+int mlx5_attach_device(struct mlx5_core_dev *dev)
{
- struct mlx5_priv *priv;
+ struct mlx5_priv *priv = &dev->priv;
+ struct auxiliary_device *adev;
+ struct auxiliary_driver *adrv;
+ int ret = 0, i;
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&mlx5_intf_mutex);
- list_for_each_entry(priv, &mlx5_dev_list, dev_list)
- mlx5_remove_device(intf, priv);
- list_del(&intf->list);
+ priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
+ if (!priv->adev[i]) {
+ bool is_supported = false;
+
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ continue;
+ }
+
+ if (mlx5_adev_devices[i].is_supported)
+ is_supported = mlx5_adev_devices[i].is_supported(dev);
+
+ if (!is_supported)
+ continue;
+
+ priv->adev[i] = add_adev(dev, i);
+ if (IS_ERR(priv->adev[i])) {
+ ret = PTR_ERR(priv->adev[i]);
+ priv->adev[i] = NULL;
+ }
+ } else {
+ adev = &priv->adev[i]->adev;
+
+ /* Pay attention that this is not PCI driver that
+ * mlx5_core_dev is connected, but auxiliary driver.
+ *
+ * Here we can race of module unload with devlink
+ * reload, but we don't need to take extra lock because
+ * we are holding global mlx5_intf_mutex.
+ */
+ if (!adev->dev.driver)
+ continue;
+ adrv = to_auxiliary_drv(adev->dev.driver);
+
+ if (adrv->resume)
+ ret = adrv->resume(adev);
+ }
+ if (ret) {
+ mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n",
+ i, mlx5_adev_devices[i].suffix);
+
+ break;
+ }
+ }
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
mutex_unlock(&mlx5_intf_mutex);
+ return ret;
}
-EXPORT_SYMBOL(mlx5_unregister_interface);
-/* Must be called with intf_mutex held */
-static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol)
+void mlx5_detach_device(struct mlx5_core_dev *dev)
{
- struct mlx5_device_context *dev_ctx;
- struct mlx5_interface *intf;
- bool found = false;
+ struct mlx5_priv *priv = &dev->priv;
+ struct auxiliary_device *adev;
+ struct auxiliary_driver *adrv;
+ pm_message_t pm = {};
+ int i;
- list_for_each_entry(intf, &intf_list, list) {
- if (intf->protocol == protocol) {
- dev_ctx = mlx5_get_device(intf, &mdev->priv);
- if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- found = true;
- break;
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&mlx5_intf_mutex);
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
+ if (!priv->adev[i])
+ continue;
+
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ goto skip_suspend;
}
+
+ adev = &priv->adev[i]->adev;
+ /* Auxiliary driver was unbind manually through sysfs */
+ if (!adev->dev.driver)
+ goto skip_suspend;
+
+ adrv = to_auxiliary_drv(adev->dev.driver);
+
+ if (adrv->suspend) {
+ adrv->suspend(adev, pm);
+ continue;
+ }
+
+skip_suspend:
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
}
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ priv->flags |= MLX5_PRIV_FLAGS_DETACH;
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+ int ret;
- return found;
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&mlx5_intf_mutex);
+ dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+ ret = mlx5_rescan_drivers_locked(dev);
+ mutex_unlock(&mlx5_intf_mutex);
+ if (ret)
+ mlx5_unregister_device(dev);
+
+ return ret;
}
-void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
+ devl_assert_locked(priv_to_devlink(dev));
mutex_lock(&mlx5_intf_mutex);
- if (mlx5_has_added_dev_by_protocol(mdev, protocol)) {
- mlx5_remove_dev_by_protocol(mdev, protocol);
- mlx5_add_dev_by_protocol(mdev, protocol);
- }
+ dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+ mlx5_rescan_drivers_locked(dev);
mutex_unlock(&mlx5_intf_mutex);
}
-/* Must be called with intf_mutex held */
-void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+static int add_drivers(struct mlx5_core_dev *dev)
{
- struct mlx5_interface *intf;
+ struct mlx5_priv *priv = &dev->priv;
+ int i, ret = 0;
- list_for_each_entry(intf, &intf_list, list)
- if (intf->protocol == protocol) {
- mlx5_add_device(intf, &dev->priv);
- break;
+ for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
+ bool is_supported = false;
+
+ if (priv->adev[i])
+ continue;
+
+ if (mlx5_adev_devices[i].is_supported)
+ is_supported = mlx5_adev_devices[i].is_supported(dev);
+
+ if (!is_supported)
+ continue;
+
+ priv->adev[i] = add_adev(dev, i);
+ if (IS_ERR(priv->adev[i])) {
+ mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n",
+ i, mlx5_adev_devices[i].suffix);
+ /* We continue to rescan drivers and leave to the caller
+ * to make decision if to release everything or continue.
+ */
+ ret = PTR_ERR(priv->adev[i]);
+ priv->adev[i] = NULL;
}
+ }
+ return ret;
}
-/* Must be called with intf_mutex held */
-void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+static void delete_drivers(struct mlx5_core_dev *dev)
{
- struct mlx5_interface *intf;
+ struct mlx5_priv *priv = &dev->priv;
+ bool delete_all;
+ int i;
- list_for_each_entry(intf, &intf_list, list)
- if (intf->protocol == protocol) {
- mlx5_remove_device(intf, &dev->priv);
- break;
+ delete_all = priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+
+ for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
+ bool is_supported = false;
+
+ if (!priv->adev[i])
+ continue;
+
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ goto del_adev;
}
+
+ if (mlx5_adev_devices[i].is_supported && !delete_all)
+ is_supported = mlx5_adev_devices[i].is_supported(dev);
+
+ if (is_supported)
+ continue;
+
+del_adev:
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
+ }
}
-static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
+/* This function is used after mlx5_core_dev is reconfigured.
+ */
+int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int err = 0;
+
+ lockdep_assert_held(&mlx5_intf_mutex);
+ if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
+ return 0;
+
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ delete_drivers(dev);
+ if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ goto out;
+
+ err = add_drivers(dev);
+
+out:
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ return err;
+}
+
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
+{
+ u64 fsystem_guid, psystem_guid;
+
+ fsystem_guid = mlx5_query_nic_system_image_guid(dev);
+ psystem_guid = mlx5_query_nic_system_image_guid(peer_dev);
+
+ return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid);
+}
+
+static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev)
{
return (u32)((pci_domain_nr(dev->pdev->bus) << 16) |
(dev->pdev->bus->number << 8) |
PCI_SLOT(dev->pdev->devfn));
}
-/* Must be called with intf_mutex held */
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+static int _next_phys_dev(struct mlx5_core_dev *mdev,
+ const struct mlx5_core_dev *curr)
{
- struct mlx5_core_dev *res = NULL;
- struct mlx5_core_dev *tmp_dev;
- struct mlx5_priv *priv;
- u32 pci_id;
+ if (!mlx5_core_is_pf(mdev))
+ return 0;
- if (!mlx5_core_is_pf(dev))
+ if (mdev == curr)
+ return 0;
+
+ if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) &&
+ mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr))
+ return 0;
+
+ return 1;
+}
+
+static void *pci_get_other_drvdata(struct device *this, struct device *other)
+{
+ if (this->driver != other->driver)
return NULL;
- pci_id = mlx5_gen_pci_id(dev);
- list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
- tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
- if (!mlx5_core_is_pf(tmp_dev))
- continue;
+ return pci_get_drvdata(to_pci_dev(other));
+}
- if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
- res = tmp_dev;
- break;
- }
- }
+static int next_phys_dev_lag(struct device *dev, const void *data)
+{
+ struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
+
+ mdev = pci_get_other_drvdata(this->device, dev);
+ if (!mdev)
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager) ||
+ !MLX5_CAP_GEN(mdev, lag_master) ||
+ (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(mdev, num_lag_ports) <= 1))
+ return 0;
+
+ return _next_phys_dev(mdev, data);
+}
+
+static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev,
+ int (*match)(struct device *dev, const void *data))
+{
+ struct device *next;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ next = bus_find_device(&pci_bus_type, NULL, dev, match);
+ if (!next)
+ return NULL;
- return res;
+ put_device(next);
+ return pci_get_drvdata(to_pci_dev(next));
}
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
+{
+ lockdep_assert_held(&mlx5_intf_mutex);
+ return mlx5_get_next_dev(dev, &next_phys_dev_lag);
+}
void mlx5_dev_list_lock(void)
{
mutex_lock(&mlx5_intf_mutex);
}
-
void mlx5_dev_list_unlock(void)
{
mutex_unlock(&mlx5_intf_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index ac108f1e5bd6..66c6a7017695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -4,26 +4,20 @@
#include <devlink.h>
#include "mlx5_core.h"
+#include "fw_reset.h"
#include "fs_core.h"
#include "eswitch.h"
+#include "esw/qos.h"
+#include "sf/dev/dev.h"
+#include "sf/sf.h"
static int mlx5_devlink_flash_update(struct devlink *devlink,
- const char *file_name,
- const char *component,
+ struct devlink_flash_update_params *params,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- const struct firmware *fw;
- int err;
-
- if (component)
- return -EOPNOTSUPP;
-
- err = request_firmware_direct(&fw, file_name, &dev->pdev->dev);
- if (err)
- return err;
- return mlx5_firmware_flash(dev, fw, extack);
+ return mlx5_firmware_flash(dev, params->fw, extack);
}
static u8 mlx5_fw_ver_major(u32 version)
@@ -52,7 +46,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
u32 running_fw, stored_fw;
int err;
- err = devlink_info_driver_name_put(req, DRIVER_NAME);
+ err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
if (err)
return err;
@@ -70,6 +64,11 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
err = devlink_info_version_running_put(req, "fw.version", version_str);
if (err)
return err;
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ version_str);
+ if (err)
+ return err;
/* no pending version, return running (stored) version */
if (stored_fw == 0)
@@ -81,24 +80,225 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
err = devlink_info_version_stored_put(req, "fw.version", version_str);
if (err)
return err;
+ return devlink_info_version_stored_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ version_str);
+}
- return 0;
+static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 reset_level, reset_type, net_port_alive;
+ int err;
+
+ err = mlx5_fw_reset_query(dev, &reset_level, &reset_type);
+ if (err)
+ return err;
+ if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) {
+ NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot");
+ return -EINVAL;
+ }
+
+ net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
+ err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack);
+ if (err)
+ return err;
+
+ err = mlx5_fw_reset_wait_reset_done(dev);
+ if (err)
+ return err;
+
+ mlx5_unload_one_devl_locked(dev);
+ err = mlx5_health_wait_pci_up(dev);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");
+
+ return err;
+}
+
+static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 reset_level;
+ int err;
+
+ err = mlx5_fw_reset_query(dev, &reset_level, NULL);
+ if (err)
+ return err;
+ if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "FW upgrade to the stored FW can't be done by FW live patching");
+ return -EINVAL;
+ }
+
+ return mlx5_fw_reset_set_live_patch(dev);
}
static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct pci_dev *pdev = dev->pdev;
+ bool sf_dev_allocated;
+ int ret = 0;
+
+ sf_dev_allocated = mlx5_sf_dev_allocated(dev);
+ if (sf_dev_allocated) {
+ /* Reload results in deleting SF device which further results in
+ * unregistering devlink instance while holding devlink_mutext.
+ * Hence, do not support reload.
+ */
+ NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated");
+ return -EOPNOTSUPP;
+ }
+
+ if (mlx5_lag_is_active(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
+ return -EOPNOTSUPP;
+ }
+
+ if (pci_num_vf(pdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
+ }
- return mlx5_unload_one(dev, false);
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ mlx5_unload_one_devl_locked(dev);
+ break;
+ case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+ if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+ ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack);
+ else
+ ret = mlx5_devlink_reload_fw_activate(devlink, extack);
+ break;
+ default:
+ /* Unsupported action should not get to this function */
+ WARN_ON(1);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
}
-static int mlx5_devlink_reload_up(struct devlink *devlink,
+static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+ enum devlink_reload_limit limit, u32 *actions_performed,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int ret = 0;
+
+ *actions_performed = BIT(action);
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ ret = mlx5_load_one_devl_locked(dev, false);
+ break;
+ case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+ if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+ break;
+ /* On fw_activate action, also driver is reloaded and reinit performed */
+ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
+ ret = mlx5_load_one_devl_locked(dev, false);
+ break;
+ default:
+ /* Unsupported action should not get to this function */
+ WARN_ON(1);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id)
+{
+ struct mlx5_devlink_trap *dl_trap;
+
+ list_for_each_entry(dl_trap, &dev->priv.traps, list)
+ if (dl_trap->trap.id == trap_id)
+ return dl_trap;
+
+ return NULL;
+}
+
+static int mlx5_devlink_trap_init(struct devlink *devlink, const struct devlink_trap *trap,
+ void *trap_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = kzalloc(sizeof(*dl_trap), GFP_KERNEL);
+ if (!dl_trap)
+ return -ENOMEM;
+
+ dl_trap->trap.id = trap->id;
+ dl_trap->trap.action = DEVLINK_TRAP_ACTION_DROP;
+ dl_trap->item = trap_ctx;
+
+ if (mlx5_find_trap_by_id(dev, trap->id)) {
+ kfree(dl_trap);
+ mlx5_core_err(dev, "Devlink trap: Trap 0x%x already found", trap->id);
+ return -EEXIST;
+ }
+
+ list_add_tail(&dl_trap->list, &dev->priv.traps);
+ return 0;
+}
+
+static void mlx5_devlink_trap_fini(struct devlink *devlink, const struct devlink_trap *trap,
+ void *trap_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_devlink_trap *dl_trap;
- return mlx5_load_one(dev, false);
+ dl_trap = mlx5_find_trap_by_id(dev, trap->id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Missing trap id 0x%x", trap->id);
+ return;
+ }
+ list_del(&dl_trap->list);
+ kfree(dl_trap);
+}
+
+static int mlx5_devlink_trap_action_set(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ enum devlink_trap_action action,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ enum devlink_trap_action action_orig;
+ struct mlx5_devlink_trap *dl_trap;
+ int err = 0;
+
+ if (is_mdev_switchdev_mode(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode");
+ return -EOPNOTSUPP;
+ }
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap->id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (action == dl_trap->trap.action)
+ goto out;
+
+ action_orig = dl_trap->trap.action;
+ dl_trap->trap.action = action;
+ err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP,
+ &dl_trap->trap);
+ if (err)
+ dl_trap->trap.action = action_orig;
+out:
+ return err;
}
static const struct devlink_ops mlx5_devlink_ops = {
@@ -109,16 +309,86 @@ static const struct devlink_ops mlx5_devlink_ops = {
.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
.eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+ .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get,
+ .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
+ .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
+ .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
+ .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
+ .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
+ .rate_node_new = mlx5_esw_devlink_rate_node_new,
+ .rate_node_del = mlx5_esw_devlink_rate_node_del,
+ .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
+#endif
+#ifdef CONFIG_MLX5_SF_MANAGER
+ .port_new = mlx5_devlink_sf_port_new,
+ .port_del = mlx5_devlink_sf_port_del,
+ .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
+ .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
#endif
.flash_update = mlx5_devlink_flash_update,
.info_get = mlx5_devlink_info_get,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+ BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+ .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET),
.reload_down = mlx5_devlink_reload_down,
.reload_up = mlx5_devlink_reload_up,
+ .trap_init = mlx5_devlink_trap_init,
+ .trap_fini = mlx5_devlink_trap_fini,
+ .trap_action_set = mlx5_devlink_trap_action_set,
};
-struct devlink *mlx5_devlink_alloc(void)
+void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb,
+ struct devlink_port *dl_port)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap_id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Report on invalid trap id 0x%x", trap_id);
+ return;
+ }
+
+ if (dl_trap->trap.action != DEVLINK_TRAP_ACTION_TRAP) {
+ mlx5_core_dbg(dev, "Devlink trap: Trap id %d has action %d", trap_id,
+ dl_trap->trap.action);
+ return;
+ }
+ devlink_trap_report(devlink, skb, dl_trap->item, dl_port, NULL);
+}
+
+int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devlink_trap *dl_trap;
+ int count = 0;
+
+ list_for_each_entry(dl_trap, &dev->priv.traps, list)
+ if (dl_trap->trap.action == DEVLINK_TRAP_ACTION_TRAP)
+ count++;
+
+ return count;
+}
+
+int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
+ enum devlink_trap_action *action)
+{
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap_id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Get action on invalid trap id 0x%x",
+ trap_id);
+ return -EINVAL;
+ }
+
+ *action = dl_trap->trap.action;
+ return 0;
+}
+
+struct devlink *mlx5_devlink_alloc(struct device *dev)
{
- return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+ return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+ dev);
}
void mlx5_devlink_free(struct devlink *devlink)
@@ -140,7 +410,7 @@ static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id,
u8 eswitch_mode;
bool smfs_cap;
- eswitch_mode = mlx5_eswitch_mode(dev->priv.eswitch);
+ eswitch_mode = mlx5_eswitch_mode(dev);
smfs_cap = mlx5_fs_dr_is_supported(dev);
if (!smfs_cap) {
@@ -190,11 +460,6 @@ static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id,
return 0;
}
-enum mlx5_devlink_param_id {
- MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
- MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
-};
-
static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
@@ -202,22 +467,133 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
struct mlx5_core_dev *dev = devlink_priv(devlink);
bool new_state = val.vbool;
- if (new_state && !MLX5_CAP_GEN(dev, roce)) {
+ if (new_state && !MLX5_CAP_GEN(dev, roce) &&
+ !MLX5_CAP_GEN(dev, roce_rw_supported)) {
NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
return -EOPNOTSUPP;
}
+ if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE");
+ return -EOPNOTSUPP;
+ }
return 0;
}
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ int group_num = val.vu32;
+
+ if (group_num < 1 || group_num > 1024) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported group number, supported range is 1-1024");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool);
+}
+
+static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch);
+ return 0;
+}
+
+static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 esw_mode;
+
+ if (!MLX5_ESWITCH_MANAGER(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported");
+ return -EOPNOTSUPP;
+ }
+ esw_mode = mlx5_eswitch_mode(dev);
+ if (esw_mode == MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch must either disabled or non switchdev mode");
+ return -EBUSY;
+ }
+ return 0;
+}
+
+#endif
+
+static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool);
+ return 0;
+}
+
+static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev);
+ return 0;
+}
+
+static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL;
+}
+
static const struct devlink_param mlx5_devlink_params[] = {
- DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
"flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
BIT(DEVLINK_PARAM_CMODE_RUNTIME),
mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set,
mlx5_devlink_fs_mode_validate),
DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, mlx5_devlink_enable_roce_validate),
+#ifdef CONFIG_MLX5_ESWITCH
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ "fdb_large_groups", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL,
+ mlx5_devlink_large_group_num_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+ "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_esw_port_metadata_get,
+ mlx5_devlink_esw_port_metadata_set,
+ mlx5_devlink_esw_port_metadata_validate),
+#endif
+ DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_enable_remote_dev_reset_get,
+ mlx5_devlink_enable_remote_dev_reset_set, NULL),
+ DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
+ DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
};
static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
@@ -225,46 +601,318 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
struct mlx5_core_dev *dev = devlink_priv(devlink);
union devlink_param_value value;
- if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS)
- strcpy(value.vstr, "dmfs");
- else
- strcpy(value.vstr, "smfs");
+ value.vbool = MLX5_CAP_GEN(dev, roce);
devlink_param_driverinit_value_set(devlink,
- MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
value);
- value.vbool = MLX5_CAP_GEN(dev, roce);
+#ifdef CONFIG_MLX5_ESWITCH
+ value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS;
devlink_param_driverinit_value_set(devlink,
- DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ value);
+#endif
+
+ value.vu32 = MLX5_COMP_EQ_SIZE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ value);
+
+ value.vu32 = MLX5_NUM_ASYNC_EQE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ value);
+}
+
+static const struct devlink_param enable_eth_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL);
+
+static int mlx5_devlink_eth_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!mlx5_eth_supported(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_eth_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ value);
+ return 0;
+}
+
+static void mlx5_devlink_eth_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_eth_supported(dev))
+ return;
+
+ devlink_param_unregister(devlink, &enable_eth_param);
+}
+
+static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ bool new_state = val.vbool;
+
+ if (new_state && !mlx5_rdma_supported(dev))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static const struct devlink_param enable_rdma_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_enable_rdma_validate);
+
+static int mlx5_devlink_rdma_param_register(struct devlink *devlink)
+{
+ union devlink_param_value value;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_rdma_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ value);
+ return 0;
+}
+
+static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return;
+
+ devlink_param_unregister(devlink, &enable_rdma_param);
+}
+
+static const struct devlink_param enable_vnet_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL);
+
+static int mlx5_devlink_vnet_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!mlx5_vnet_supported(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_vnet_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
value);
+ return 0;
+}
+
+static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_vnet_supported(dev))
+ return;
+
+ devlink_param_unregister(devlink, &enable_vnet_param);
}
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+static int mlx5_devlink_auxdev_params_register(struct devlink *devlink)
{
int err;
- err = devlink_register(devlink, dev);
+ err = mlx5_devlink_eth_param_register(devlink);
if (err)
return err;
+ err = mlx5_devlink_rdma_param_register(devlink);
+ if (err)
+ goto rdma_err;
+
+ err = mlx5_devlink_vnet_param_register(devlink);
+ if (err)
+ goto vnet_err;
+ return 0;
+
+vnet_err:
+ mlx5_devlink_rdma_param_unregister(devlink);
+rdma_err:
+ mlx5_devlink_eth_param_unregister(devlink);
+ return err;
+}
+
+static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
+{
+ mlx5_devlink_vnet_param_unregister(devlink);
+ mlx5_devlink_rdma_param_unregister(devlink);
+ mlx5_devlink_eth_param_unregister(devlink);
+}
+
+static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (val.vu32 == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0");
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(val.vu32)) {
+ NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs");
+ return -EINVAL;
+ }
+
+ if (ilog2(val.vu32) >
+ MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct devlink_param max_uc_list_param =
+ DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_max_uc_list_validate);
+
+static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported))
+ return 0;
+
+ err = devlink_param_register(devlink, &max_uc_list_param);
+ if (err)
+ return err;
+
+ value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list);
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ value);
+ return 0;
+}
+
+static void
+mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported))
+ return;
+
+ devlink_param_unregister(devlink, &max_uc_list_param);
+}
+
+#define MLX5_TRAP_DROP(_id, _group_id) \
+ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \
+ DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
+ DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT)
+
+static const struct devlink_trap mlx5_traps_arr[] = {
+ MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
+ MLX5_TRAP_DROP(DMAC_FILTER, L2_DROPS),
+};
+
+static const struct devlink_trap_group mlx5_trap_groups_arr[] = {
+ DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0),
+};
+
+static int mlx5_devlink_traps_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *core_dev = devlink_priv(devlink);
+ int err;
+
+ err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
+ if (err)
+ return err;
+
+ err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr),
+ &core_dev->priv);
+ if (err)
+ goto err_trap_group;
+ return 0;
+
+err_trap_group:
+ devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
+ return err;
+}
+
+static void mlx5_devlink_traps_unregister(struct devlink *devlink)
+{
+ devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr));
+ devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
+}
+
+int mlx5_devlink_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
err = devlink_params_register(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
if (err)
- goto params_reg_err;
+ return err;
+
mlx5_devlink_set_params_init_values(devlink);
- devlink_params_publish(devlink);
- devlink_reload_enable(devlink);
+
+ err = mlx5_devlink_auxdev_params_register(devlink);
+ if (err)
+ goto auxdev_reg_err;
+
+ err = mlx5_devlink_max_uc_list_param_register(devlink);
+ if (err)
+ goto max_uc_list_err;
+
+ err = mlx5_devlink_traps_register(devlink);
+ if (err)
+ goto traps_reg_err;
+
+ if (!mlx5_core_is_mp_slave(dev))
+ devlink_set_features(devlink, DEVLINK_F_RELOAD);
+
return 0;
-params_reg_err:
- devlink_unregister(devlink);
+traps_reg_err:
+ mlx5_devlink_max_uc_list_param_unregister(devlink);
+max_uc_list_err:
+ mlx5_devlink_auxdev_params_unregister(devlink);
+auxdev_reg_err:
+ devlink_params_unregister(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
return err;
}
void mlx5_devlink_unregister(struct devlink *devlink)
{
- devlink_reload_disable(devlink);
+ mlx5_devlink_traps_unregister(devlink);
+ mlx5_devlink_max_uc_list_param_unregister(devlink);
+ mlx5_devlink_auxdev_params_unregister(devlink);
devlink_params_unregister(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
- devlink_unregister(devlink);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index d0ba03774ddf..30bf4882779b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -6,9 +6,34 @@
#include <net/devlink.h>
-struct devlink *mlx5_devlink_alloc(void);
+enum mlx5_devlink_param_id {
+ MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
+ MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+};
+
+struct mlx5_trap_ctx {
+ int id;
+ int action;
+};
+
+struct mlx5_devlink_trap {
+ struct mlx5_trap_ctx trap;
+ void *item;
+ struct list_head list;
+};
+
+struct mlx5_core_dev;
+void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb,
+ struct devlink_port *dl_port);
+int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
+int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
+ enum devlink_trap_action *action);
+
+struct devlink *mlx5_devlink_alloc(struct device *dev);
void mlx5_devlink_free(struct devlink *devlink);
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+int mlx5_devlink_register(struct devlink *devlink);
void mlx5_devlink_unregister(struct devlink *devlink);
#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
index 1177860a2ee4..f15718db5d0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
@@ -15,7 +15,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update,
TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha,
bool neigh_connected),
TP_ARGS(nhe, ha, neigh_connected),
- TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name)
+ TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
__array(u8, ha, ETH_ALEN)
__array(u8, v4, 4)
__array(u8, v6, 16)
@@ -25,7 +25,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update,
struct in6_addr *pin6;
__be32 *p32;
- __assign_str(devname, mn->dev->name);
+ __assign_str(devname, nhe->neigh_dev->name);
__entry->neigh_connected = neigh_connected;
memcpy(__entry->ha, ha, ETH_ALEN);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
index d4e6cfaaade3..ac52ef37f38a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
@@ -77,7 +77,7 @@ TRACE_EVENT(mlx5e_stats_flower,
TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used),
TP_ARGS(nhe, neigh_used),
- TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name)
+ TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
__array(u8, v4, 4)
__array(u8, v6, 16)
__field(bool, neigh_used)
@@ -86,7 +86,7 @@ TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
struct in6_addr *pin6;
__be32 *p32;
- __assign_str(devname, mn->dev->name);
+ __assign_str(devname, nhe->neigh_dev->name);
__entry->neigh_used = neigh_used;
p32 = (__be32 *)__entry->v4;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 8ecac81a385d..c5bb79a4fa57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -76,58 +76,59 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p,
.v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
MASK_VAL_L2(u16, ethertype, ethertype);
+ MASK_VAL_L2(u8, ip_version, ip_version);
PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
PRINT_MASKED_VAL(ethertype, p, "%04x");
- if (ethertype.m == 0xffff) {
- if (ethertype.v == ETH_P_IP) {
+ if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) ||
+ (ip_version.m == 0xf && ip_version.v == 4)) {
#define MASK_VAL_L2_BE(type, name, fld) \
MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
- MASK_VAL_L2_BE(u32, src_ipv4,
- src_ipv4_src_ipv6.ipv4_layout.ipv4);
- MASK_VAL_L2_BE(u32, dst_ipv4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, src_ipv4,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, dst_ipv4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
- PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
- "%pI4");
- PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
- "%pI4");
- } else if (ethertype.v == ETH_P_IPV6) {
- static const struct in6_addr full_ones = {
- .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
- __constant_htonl(0xffffffff),
- __constant_htonl(0xffffffff),
- __constant_htonl(0xffffffff)},
- };
- DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
- DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+ PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+ "%pI4");
+ PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+ "%pI4");
+ } else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) ||
+ (ip_version.m == 0xf && ip_version.v == 6)) {
+ static const struct in6_addr full_ones = {
+ .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff)},
+ };
+ DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+ DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
- memcpy(src_ipv6.m.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(src_ipv6.m));
- memcpy(dst_ipv6.m.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(dst_ipv6.m));
- memcpy(src_ipv6.v.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(src_ipv6.v));
- memcpy(dst_ipv6.v.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(dst_ipv6.v));
+ memcpy(src_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.m));
+ memcpy(dst_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.m));
+ memcpy(src_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.v));
+ memcpy(dst_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.v));
- if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
- trace_seq_printf(p, "src_ipv6=%pI6 ",
- &src_ipv6.v);
- if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
- trace_seq_printf(p, "dst_ipv6=%pI6 ",
- &dst_ipv6.v);
- }
+ if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "src_ipv6=%pI6 ",
+ &src_ipv6.v);
+ if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "dst_ipv6=%pI6 ",
+ &dst_ipv6.v);
}
#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
@@ -234,6 +235,9 @@ const char *parse_fs_dst(struct trace_seq *p,
const char *ret = trace_seq_buffer_ptr(p);
switch (dst->type) {
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+ trace_seq_printf(p, "uplink\n");
+ break;
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
trace_seq_printf(p, "vport=%u\n", dst->vport.num);
break;
@@ -246,12 +250,18 @@ const char *parse_fs_dst(struct trace_seq *p,
case MLX5_FLOW_DESTINATION_TYPE_TIR:
trace_seq_printf(p, "tir=%u\n", dst->tir_num);
break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ trace_seq_printf(p, "sampler_id=%u\n", dst->sampler_id);
+ break;
case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
trace_seq_printf(p, "counter_id=%u\n", counter_id);
break;
case MLX5_FLOW_DESTINATION_TYPE_PORT:
trace_seq_printf(p, "port\n");
break;
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ trace_seq_printf(p, "none\n");
+ break;
}
trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 94d7b69a95c7..978a2bb8e122 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -124,7 +124,7 @@ static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev = tracer->dev;
- struct device *ddev = &dev->pdev->dev;
+ struct device *ddev;
dma_addr_t dma;
void *buff;
gfp_t gfp;
@@ -142,6 +142,7 @@ static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
}
tracer->buff.log_buf = buff;
+ ddev = mlx5_core_dma_dev(dev);
dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
@@ -162,11 +163,12 @@ free_pages:
static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev = tracer->dev;
- struct device *ddev = &dev->pdev->dev;
+ struct device *ddev;
if (!tracer->buff.log_buf)
return;
+ ddev = mlx5_core_dma_dev(dev);
dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
}
@@ -188,7 +190,7 @@ static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer)
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
- mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
@@ -673,10 +675,13 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
if (!tracer->owner)
return;
+ if (unlikely(!tracer->str_db.loaded))
+ goto arm;
+
block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
- /* Copy the block to local buffer to avoid HW override while being processed*/
+ /* Copy the block to local buffer to avoid HW override while being processed */
memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
TRACER_BLOCK_SIZE_BYTE);
@@ -684,7 +689,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
while (block_timestamp > tracer->last_timestamp) {
- /* Check block override if its not the first block */
+ /* Check block override if it's not the first block */
if (!tracer->last_timestamp) {
u64 *ts_event;
/* To avoid block override be the HW in case of buffer
@@ -730,6 +735,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
&tmp_trace_block[TRACES_PER_BLOCK - 1]);
}
+arm:
mlx5_fw_tracer_arm(dev);
}
@@ -743,7 +749,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
ilog2(TRACER_BUFFER_PAGE_NUM));
- MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+ MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey);
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
MLX5_REG_MTRC_CONF, 0, 1);
@@ -935,7 +941,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
return NULL;
}
- tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+ tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer)
return ERR_PTR(-ENOMEM);
@@ -982,7 +988,7 @@ destroy_workqueue:
tracer->dev = NULL;
destroy_workqueue(tracer->work_queue);
free_tracer:
- kfree(tracer);
+ kvfree(tracer);
return ERR_PTR(err);
}
@@ -1005,7 +1011,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
if (err) {
mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
- return err;
+ goto err_cancel_work;
}
err = mlx5_fw_tracer_create_mkey(tracer);
@@ -1017,12 +1023,20 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
mlx5_eq_notifier_register(dev, &tracer->nb);
- mlx5_fw_tracer_start(tracer);
-
+ err = mlx5_fw_tracer_start(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err);
+ goto err_notifier_unregister;
+ }
return 0;
+err_notifier_unregister:
+ mlx5_eq_notifier_unregister(dev, &tracer->nb);
+ mlx5_core_destroy_mkey(dev, tracer->buff.mkey);
err_dealloc_pd:
mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
+err_cancel_work:
+ cancel_work_sync(&tracer->read_fw_strings_work);
return err;
}
@@ -1041,7 +1055,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
if (tracer->owner)
mlx5_fw_tracer_ownership_release(tracer);
- mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey);
+ mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey);
mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
}
@@ -1059,9 +1073,60 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
mlx5_fw_tracer_clean_saved_traces_array(tracer);
mlx5_fw_tracer_free_strings_db(tracer);
mlx5_fw_tracer_destroy_log_buf(tracer);
- flush_workqueue(tracer->work_queue);
destroy_workqueue(tracer->work_queue);
- kfree(tracer);
+ kvfree(tracer);
+}
+
+static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ mlx5_fw_tracer_clean_ready_list(tracer);
+ mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
+ mlx5_fw_tracer_free_strings_db(tracer);
+
+ dev = tracer->dev;
+ err = mlx5_query_mtrc_caps(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+ return err;
+ }
+
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err);
+ return err;
+ }
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
+
+ return 0;
+}
+
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ if (IS_ERR_OR_NULL(tracer))
+ return 0;
+
+ dev = tracer->dev;
+ mlx5_fw_tracer_cleanup(tracer);
+ err = mlx5_fw_tracer_recreate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n");
+ return err;
+ }
+ err = mlx5_fw_tracer_init(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n");
+ return err;
+ }
+
+ return 0;
}
static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
@@ -1072,12 +1137,10 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void
switch (eqe->sub_type) {
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
- if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
- queue_work(tracer->work_queue, &tracer->ownership_change_work);
+ queue_work(tracer->work_queue, &tracer->ownership_change_work);
break;
case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
- if (likely(tracer->str_db.loaded))
- queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
break;
default:
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 40601fba80ba..4762b55b0b0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -89,7 +89,7 @@ struct mlx5_fw_tracer {
void *log_buf;
dma_addr_t dma;
u32 size;
- struct mlx5_core_mkey mkey;
+ u32 mkey;
u32 consumer_index;
} buff;
@@ -191,5 +191,6 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
struct devlink_fmsg *fmsg);
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
new file mode 100644
index 000000000000..c5b560a8b026
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rsc_dump.h"
+#include "lib/mlx5.h"
+
+#define MLX5_SGMT_TYPE(SGMT) MLX5_SGMT_TYPE_##SGMT
+#define MLX5_SGMT_STR_ASSING(SGMT)[MLX5_SGMT_TYPE(SGMT)] = #SGMT
+static const char *const mlx5_rsc_sgmt_name[] = {
+ MLX5_SGMT_STR_ASSING(HW_CQPC),
+ MLX5_SGMT_STR_ASSING(HW_SQPC),
+ MLX5_SGMT_STR_ASSING(HW_RQPC),
+ MLX5_SGMT_STR_ASSING(FULL_SRQC),
+ MLX5_SGMT_STR_ASSING(FULL_CQC),
+ MLX5_SGMT_STR_ASSING(FULL_EQC),
+ MLX5_SGMT_STR_ASSING(FULL_QPC),
+ MLX5_SGMT_STR_ASSING(SND_BUFF),
+ MLX5_SGMT_STR_ASSING(RCV_BUFF),
+ MLX5_SGMT_STR_ASSING(SRQ_BUFF),
+ MLX5_SGMT_STR_ASSING(CQ_BUFF),
+ MLX5_SGMT_STR_ASSING(EQ_BUFF),
+ MLX5_SGMT_STR_ASSING(SX_SLICE),
+ MLX5_SGMT_STR_ASSING(SX_SLICE_ALL),
+ MLX5_SGMT_STR_ASSING(RDB),
+ MLX5_SGMT_STR_ASSING(RX_SLICE_ALL),
+ MLX5_SGMT_STR_ASSING(PRM_QUERY_QP),
+ MLX5_SGMT_STR_ASSING(PRM_QUERY_CQ),
+ MLX5_SGMT_STR_ASSING(PRM_QUERY_MKEY),
+};
+
+struct mlx5_rsc_dump {
+ u32 pdn;
+ u32 mkey;
+ u32 number_of_menu_items;
+ u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
+};
+
+struct mlx5_rsc_dump_cmd {
+ u64 mem_size;
+ u8 cmd[MLX5_ST_SZ_BYTES(resource_dump)];
+};
+
+static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5_rsc_sgmt_name); i++)
+ if (!strcmp(name, mlx5_rsc_sgmt_name[i]))
+ return i;
+
+ return -EINVAL;
+}
+
+#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
+
+static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
+ int read_size, int start_idx)
+{
+ void *data = page_address(page);
+ enum mlx5_sgmt_type sgmt_idx;
+ int num_of_items;
+ char *sgmt_name;
+ void *member;
+ int size = 0;
+ void *menu;
+ int i;
+
+ if (!start_idx) {
+ menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+ rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
+ num_of_records);
+ size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
+ data += size;
+ }
+ num_of_items = rsc_dump->number_of_menu_items;
+
+ for (i = 0; start_idx + i < num_of_items; i++) {
+ size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
+ if (size >= read_size)
+ return start_idx + i;
+
+ member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
+ sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
+ sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
+ if (sgmt_idx == -EINVAL)
+ continue;
+ rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
+ member, segment_type);
+ }
+ return 0;
+}
+
+static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page)
+{
+ struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump;
+ struct device *ddev = mlx5_core_dma_dev(dev);
+ u32 out_seq_num;
+ u32 in_seq_num;
+ dma_addr_t dma;
+ int err;
+
+ dma = dma_map_page(ddev, page, 0, cmd->mem_size, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(ddev, dma)))
+ return -ENOMEM;
+
+ in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num);
+ MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey);
+ MLX5_SET64(resource_dump, cmd->cmd, address, dma);
+
+ err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd,
+ sizeof(cmd->cmd), MLX5_REG_RESOURCE_DUMP, 0, 1);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to access err %d\n", err);
+ goto out;
+ }
+ out_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num);
+ if (out_seq_num && (in_seq_num + 1 != out_seq_num))
+ err = -EIO;
+out:
+ dma_unmap_page(ddev, dma, cmd->mem_size, DMA_FROM_DEVICE);
+ return err;
+}
+
+struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev,
+ struct mlx5_rsc_key *key)
+{
+ struct mlx5_rsc_dump_cmd *cmd;
+ int sgmt_type;
+
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ sgmt_type = dev->rsc_dump->fw_segment_type[key->rsc];
+ if (!sgmt_type && key->rsc != MLX5_SGMT_TYPE_MENU)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd) {
+ mlx5_core_err(dev, "Resource dump: Failed to allocate command\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ MLX5_SET(resource_dump, cmd->cmd, segment_type, sgmt_type);
+ MLX5_SET(resource_dump, cmd->cmd, index1, key->index1);
+ MLX5_SET(resource_dump, cmd->cmd, index2, key->index2);
+ MLX5_SET(resource_dump, cmd->cmd, num_of_obj1, key->num_of_obj1);
+ MLX5_SET(resource_dump, cmd->cmd, num_of_obj2, key->num_of_obj2);
+ MLX5_SET(resource_dump, cmd->cmd, size, key->size);
+ cmd->mem_size = key->size;
+ return cmd;
+}
+EXPORT_SYMBOL(mlx5_rsc_dump_cmd_create);
+
+void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd)
+{
+ kfree(cmd);
+}
+EXPORT_SYMBOL(mlx5_rsc_dump_cmd_destroy);
+
+int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page, int *size)
+{
+ bool more_dump;
+ int err;
+
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return -EOPNOTSUPP;
+
+ err = mlx5_rsc_dump_trigger(dev, cmd, page);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to trigger dump, %d\n", err);
+ return err;
+ }
+ *size = MLX5_GET(resource_dump, cmd->cmd, size);
+ more_dump = MLX5_GET(resource_dump, cmd->cmd, more_dump);
+
+ return more_dump;
+}
+EXPORT_SYMBOL(mlx5_rsc_dump_next);
+
+#define MLX5_RSC_DUMP_MENU_SEGMENT 0xffff
+static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rsc_dump_cmd *cmd = NULL;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int start_idx = 0;
+ int size;
+ int err;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.rsc = MLX5_SGMT_TYPE_MENU;
+ key.size = PAGE_SIZE;
+ cmd = mlx5_rsc_dump_cmd_create(dev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+ MLX5_SET(resource_dump, cmd->cmd, segment_type, MLX5_RSC_DUMP_MENU_SEGMENT);
+
+ do {
+ err = mlx5_rsc_dump_next(dev, cmd, page, &size);
+ if (err < 0)
+ goto destroy_cmd;
+
+ start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
+
+ } while (err > 0);
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+
+ return err;
+}
+
+static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ u32 *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rsc_dump *rsc_dump;
+
+ if (!MLX5_CAP_DEBUG(dev, resource_dump)) {
+ mlx5_core_dbg(dev, "Resource dump: capability not present\n");
+ return NULL;
+ }
+ rsc_dump = kzalloc(sizeof(*rsc_dump), GFP_KERNEL);
+ if (!rsc_dump)
+ return ERR_PTR(-ENOMEM);
+
+ return rsc_dump;
+}
+
+void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev)
+{
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return;
+ kfree(dev->rsc_dump);
+}
+
+int mlx5_rsc_dump_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump;
+ int err;
+
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return 0;
+
+ err = mlx5_core_alloc_pd(dev, &rsc_dump->pdn);
+ if (err) {
+ mlx5_core_warn(dev, "Resource dump: Failed to allocate PD %d\n", err);
+ return err;
+ }
+ err = mlx5_rsc_dump_create_mkey(dev, rsc_dump->pdn, &rsc_dump->mkey);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to create mkey, %d\n", err);
+ goto free_pd;
+ }
+ err = mlx5_rsc_dump_menu(dev);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to read menu, %d\n", err);
+ goto destroy_mkey;
+ }
+ return err;
+
+destroy_mkey:
+ mlx5_core_destroy_mkey(dev, rsc_dump->mkey);
+free_pd:
+ mlx5_core_dealloc_pd(dev, rsc_dump->pdn);
+ return err;
+}
+
+void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev)
+{
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return;
+
+ mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey);
+ mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h
new file mode 100644
index 000000000000..64c4956db6d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_RSC_DUMP_H
+#define __MLX5_RSC_DUMP_H
+
+#include <linux/mlx5/rsc_dump.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define MLX5_RSC_DUMP_ALL 0xFFFF
+struct mlx5_rsc_dump_cmd;
+struct mlx5_rsc_dump;
+
+struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev);
+void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_rsc_dump_init(struct mlx5_core_dev *dev);
+void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev);
+
+struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev,
+ struct mlx5_rsc_key *key);
+void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd);
+
+int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page, int *size);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index d2228e37450f..464eb3a18450 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -8,7 +8,16 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
}
-static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
+static bool mlx5_ecpf_esw_admins_host_pf(const struct mlx5_core_dev *dev)
+{
+ /* In separate host mode, PF enables itself.
+ * When ECPF is eswitch manager, eswitch enables host PF after
+ * eswitch is setup.
+ */
+ return mlx5_core_is_ecpf_esw_manager(dev);
+}
+
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
@@ -19,7 +28,7 @@ static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
}
-static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
@@ -30,56 +39,55 @@ static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
+static int mlx5_host_pf_init(struct mlx5_core_dev *dev)
{
int err;
- err = mlx5_peer_pf_enable_hca(dev);
+ if (mlx5_ecpf_esw_admins_host_pf(dev))
+ return 0;
+
+ /* ECPF shall enable HCA for host PF in the same way a PF
+ * does this for its VFs when ECPF is not a eswitch manager.
+ */
+ err = mlx5_cmd_host_pf_enable_hca(dev);
if (err)
- mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n",
- err);
+ mlx5_core_err(dev, "Failed to enable external host PF HCA err(%d)\n", err);
return err;
}
-static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
+static void mlx5_host_pf_cleanup(struct mlx5_core_dev *dev)
{
int err;
- err = mlx5_peer_pf_disable_hca(dev);
+ if (mlx5_ecpf_esw_admins_host_pf(dev))
+ return;
+
+ err = mlx5_cmd_host_pf_disable_hca(dev);
if (err) {
- mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n",
- err);
+ mlx5_core_err(dev, "Failed to disable external host PF HCA err(%d)\n", err);
return;
}
-
- err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages);
- if (err)
- mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n",
- err);
}
int mlx5_ec_init(struct mlx5_core_dev *dev)
{
- int err = 0;
-
if (!mlx5_core_is_ecpf(dev))
return 0;
- /* ECPF shall enable HCA for peer PF in the same way a PF
- * does this for its VFs.
- */
- err = mlx5_peer_pf_init(dev);
- if (err)
- return err;
-
- return 0;
+ return mlx5_host_pf_init(dev);
}
void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
{
+ int err;
+
if (!mlx5_core_is_ecpf(dev))
return;
- mlx5_peer_pf_cleanup(dev);
+ mlx5_host_pf_cleanup(dev);
+
+ err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages);
+ if (err)
+ mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index d3d7a00a02ac..40b6ad76dca6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -17,6 +17,9 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
int mlx5_ec_init(struct mlx5_core_dev *dev);
void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 220ef9f06f84..26a23047f1f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -36,7 +36,6 @@
#include <linux/etherdevice.h>
#include <linux/timecounter.h>
#include <linux/net_tstamp.h>
-#include <linux/ptp_clock_kernel.h>
#include <linux/crash_dump.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/qp.h>
@@ -46,6 +45,7 @@
#include <linux/mlx5/transobj.h>
#include <linux/mlx5/fs.h>
#include <linux/rhashtable.h>
+#include <net/udp_tunnel.h>
#include <net/switchdev.h>
#include <net/xdp.h>
#include <linux/dim.h>
@@ -53,8 +53,13 @@
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
+#include "en/dcbnl.h"
#include "en/fs.h"
+#include "en/qos.h"
#include "lib/hv_vhca.h"
+#include "lib/clock.h"
+#include "en/rx_res.h"
+#include "en/selq.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -62,22 +67,24 @@ struct page_pool;
#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
#define MLX5E_METADATA_ETHER_LEN 8
-#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
-
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
-#define MLX5E_MAX_PRIORITY 8
-#define MLX5E_MAX_DSCP 64
#define MLX5E_MAX_NUM_TC 8
+#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE
#define MLX5_RX_HEADROOM NET_SKB_PAD
#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define MLX5E_RX_MAX_HEAD (256)
+#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
+#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
+#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
+#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
+#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
(6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
@@ -86,23 +93,26 @@ struct page_pool;
#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
-#define MLX5_MPWRQ_LOG_WQE_SZ 18
-#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
- MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
-#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
+#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18
+
+/* Keep in sync with mlx5e_mpwrq_log_wqe_sz.
+ * These are theoretical maximums, which can be further restricted by
+ * capabilities. These values are used for static resource allocations and
+ * sanity checks.
+ * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE
+ * size actually used at runtime, but it's not a problem when calculating static
+ * array sizes.
+ */
+#define MLX5_UMR_MAX_MTT_SPACE \
+ (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \
+ MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \
+ rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt))
-#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
-#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
-#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
#define MLX5E_MAX_RQ_NUM_MTTS \
- ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+ (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */
+#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
- (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
-#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
- (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
- (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
#define MLX5E_LOG_MAX_RX_WQE_BULK \
@@ -114,12 +124,10 @@ struct page_pool;
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \
- MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
-#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
#define MLX5E_DEFAULT_LRO_TIMEOUT 32
#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
@@ -132,21 +140,30 @@ struct page_pool;
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
-#define MLX5E_LOG_INDIR_RQT_SIZE 0x7
-#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE)
#define MLX5E_MIN_NUM_CHANNELS 0x1
-#define MLX5E_MAX_NUM_CHANNELS MLX5E_INDIR_RQT_SIZE
-#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
+#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
#define MLX5E_TX_CQ_POLL_BUDGET 128
#define MLX5E_TX_XSK_POLL_BUDGET 64
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
-#define MLX5E_UMR_WQE_INLINE_SZ \
- (sizeof(struct mlx5e_umr_wqe) + \
- ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
- MLX5_UMR_MTT_ALIGNMENT))
-#define MLX5E_UMR_WQEBBS \
- (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
+ (sizeof(struct mlx5e_umr_wqe) +\
+ (sizeof(struct mlx5_klm) * (sgl_len)))
+
+#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
+ (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
+ (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
+ (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
+
+#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
+ ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
+
+#define MLX5E_MAX_KLM_PER_WQE(mdev) \
+ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
@@ -157,11 +174,8 @@ do { \
##__VA_ARGS__); \
} while (0)
-enum mlx5e_rq_group {
- MLX5E_RQ_GROUP_REGULAR,
- MLX5E_RQ_GROUP_XSK,
-#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
-};
+#define mlx5e_state_dereference(priv, p) \
+ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
{
@@ -191,20 +205,45 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
+/* The maximum WQE size can be retrieved by max_wqe_sz_sq in
+ * bytes units. Driver hardens the limitation to 1KB (16
+ * WQEBBs), unless firmware capability is stricter.
+ */
+static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev)
+{
+ BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX);
+
+ return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS,
+ MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
+}
+
+static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev)
+{
+/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
+ * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
+ * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64)
+ * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower
+ * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
+ * cache-aligned.
+ */
+ u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+
+ wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
+#if L1_CACHE_BYTES >= 128
+ wqebbs = ALIGN_DOWN(wqebbs, 2);
+#endif
+ return wqebbs;
+}
+
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
- union {
- struct {
- struct mlx5_wqe_eth_seg eth;
- struct mlx5_wqe_data_seg data[0];
- };
- u8 tls_progress_params_ctx[0];
- };
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[];
};
struct mlx5e_rx_wqe_ll {
struct mlx5_wqe_srq_next_seg next;
- struct mlx5_wqe_data_seg data[0];
+ struct mlx5_wqe_data_seg data[];
};
struct mlx5e_rx_wqe_cyc {
@@ -216,13 +255,12 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
union {
- struct mlx5_mtt inline_mtts[0];
- u8 tls_static_params_ctx[0];
+ DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
+ DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
+ DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms);
};
};
-extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
-
enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_CQE_BASED_MODER,
MLX5E_PFLAG_TX_CQE_BASED_MODER,
@@ -230,6 +268,8 @@ enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_STRIDING_RQ,
MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_PFLAG_SKB_TX_MPWQE,
+ MLX5E_PFLAG_TX_PORT_TS,
MLX5E_NUM_PFLAGS, /* Keep last */
};
@@ -243,69 +283,58 @@ enum mlx5e_priv_flag {
#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#endif
+enum packet_merge {
+ MLX5E_PACKET_MERGE_NONE,
+ MLX5E_PACKET_MERGE_LRO,
+ MLX5E_PACKET_MERGE_SHAMPO,
+};
+
+struct mlx5e_packet_merge_param {
+ enum packet_merge type;
+ u32 timeout;
+ struct {
+ u8 match_criteria_type;
+ u8 alignment_granularity;
+ } shampo;
+};
struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
u8 log_rq_mtu_frames;
u16 num_channels;
- u8 num_tc;
+ struct {
+ u16 mode;
+ u8 num_tc;
+ struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+ struct {
+ u64 max_rate[TC_MAX_QUEUE];
+ u32 hw_id[TC_MAX_QUEUE];
+ } channel;
+ } mqprio;
bool rx_cqe_compress_def;
bool tunneled_offload_en;
struct dim_cq_moder rx_cq_moderation;
struct dim_cq_moder tx_cq_moderation;
- bool lro_en;
+ struct mlx5e_packet_merge_param packet_merge;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
bool scatter_fcs_en;
bool rx_dim_enabled;
bool tx_dim_enabled;
- u32 lro_timeout;
u32 pflags;
struct bpf_prog *xdp_prog;
struct mlx5e_xsk *xsk;
unsigned int sw_mtu;
int hard_mtu;
+ bool ptp_rx;
};
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-struct mlx5e_cee_config {
- /* bw pct for priority group */
- u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
- u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
- bool pfc_setting[CEE_DCBX_MAX_PRIO];
- bool pfc_enable;
-};
-
-enum {
- MLX5_DCB_CHG_RESET,
- MLX5_DCB_NO_CHG,
- MLX5_DCB_CHG_NO_RESET,
-};
-
-struct mlx5e_dcbx {
- enum mlx5_dcbx_oper_mode mode;
- struct mlx5e_cee_config cee_cfg; /* pending configuration */
- u8 dscp_app_cnt;
-
- /* The only setting that cannot be read from FW */
- u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
- u8 cap;
-
- /* Buffer configuration */
- bool manual_buffer;
- u32 cable_len;
- u32 xoff;
-};
-
-struct mlx5e_dcbx_dp {
- u8 dscp2prio[MLX5E_MAX_DSCP];
- u8 trust_state;
-};
-#endif
+static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
+{
+ return params->mqprio.mode == TC_MQPRIO_MODE_DCB ?
+ params->mqprio.num_tc : 1;
+}
enum {
MLX5E_RQ_STATE_ENABLED,
@@ -313,6 +342,8 @@ enum {
MLX5E_RQ_STATE_AM,
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
+ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
+ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
};
struct mlx5e_cq {
@@ -323,10 +354,12 @@ struct mlx5e_cq {
u16 event_ctr;
struct napi_struct *napi;
struct mlx5_core_cq mcq;
- struct mlx5e_channel *channel;
+ struct mlx5e_ch_stats *ch_stats;
/* control */
+ struct net_device *netdev;
struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
struct mlx5_wq_ctrl wq_ctrl;
} ____cacheline_aligned_in_smp;
@@ -339,16 +372,6 @@ struct mlx5e_cq_decomp {
u16 wqe_counter;
} ____cacheline_aligned_in_smp;
-struct mlx5e_tx_wqe_info {
- struct sk_buff *skb;
- u32 num_bytes;
- u8 num_wqebbs;
- u8 num_dma;
-#ifdef CONFIG_MLX5_EN_TLS
- struct page *resync_dump_frag_page;
-#endif
-};
-
enum mlx5e_dma_map_type {
MLX5E_DMA_MAP_SINGLE,
MLX5E_DMA_MAP_PAGE
@@ -362,35 +385,48 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_MPWQE,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
- MLX5E_SQ_STATE_TLS,
MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
+ MLX5E_SQ_STATE_PENDING_XSK_TX,
+ MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
+ MLX5E_SQ_STATE_XDP_MULTIBUF,
};
-struct mlx5e_sq_wqe_info {
- u8 opcode;
+struct mlx5e_tx_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u32 bytes_count;
+ u8 ds_count;
+ u8 pkt_count;
+ u8 inline_on;
+};
- /* Auxiliary data for different opcodes. */
- union {
- struct {
- struct mlx5e_rq *rq;
- } umr;
- };
+struct mlx5e_skb_fifo {
+ struct sk_buff **fifo;
+ u16 *pc;
+ u16 *cc;
+ u16 mask;
};
+struct mlx5e_ptpsq;
+
struct mlx5e_txqsq {
/* data path */
/* dirtied @completion */
u16 cc;
+ u16 skb_fifo_cc;
u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
+ u16 skb_fifo_pc;
u32 dma_fifo_pc;
+ struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
@@ -400,38 +436,37 @@ struct mlx5e_txqsq {
struct mlx5e_sq_stats *stats;
struct {
struct mlx5e_sq_dma *dma_fifo;
+ struct mlx5e_skb_fifo skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info;
} db;
void __iomem *uar_map;
struct netdev_queue *txq;
u32 sqn;
u16 stop_room;
+ u8 max_sq_mpw_wqebbs;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
unsigned long state;
unsigned int hw_mtu;
- struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
+ struct net_device *netdev;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
- struct mlx5e_channel *channel;
int ch_ix;
int txq_ix;
u32 rate_limit;
struct work_struct recover_work;
+ struct mlx5e_ptpsq *ptpsq;
+ cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
-struct mlx5e_dma_info {
- dma_addr_t addr;
- union {
- struct page *page;
- struct {
- u64 handle;
- void *data;
- } xsk;
- };
+union mlx5e_alloc_unit {
+ struct page *page;
+ struct xdp_buff *xsk;
};
/* XDP packets can be transmitted in different ways. On completion, we need to
@@ -464,12 +499,12 @@ struct mlx5e_xdp_info {
} frame;
struct {
struct mlx5e_rq *rq;
- struct mlx5e_dma_info di;
+ struct page *page;
} page;
};
};
-struct mlx5e_xdp_xmit_data {
+struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len;
@@ -482,24 +517,11 @@ struct mlx5e_xdp_info_fifo {
u32 mask;
};
-struct mlx5e_xdp_wqe_info {
- u8 num_wqebbs;
- u8 num_pkts;
-};
-
-struct mlx5e_xdp_mpwqe {
- /* Current MPWQE session */
- struct mlx5e_tx_wqe *wqe;
- u8 ds_count;
- u8 pkt_count;
- u8 inline_on;
-};
-
struct mlx5e_xdpsq;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
- struct mlx5e_xdp_xmit_data *,
- struct mlx5e_xdp_info *,
+ struct mlx5e_xmit_data *,
+ struct skb_shared_info *,
int);
struct mlx5e_xdpsq {
@@ -513,12 +535,12 @@ struct mlx5e_xdpsq {
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
u16 pc;
struct mlx5_wqe_ctrl_seg *doorbell_cseg;
- struct mlx5e_xdp_mpwqe mpwqe;
+ struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
- struct xdp_umem *umem;
+ struct xsk_buff_pool *xsk_pool;
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
@@ -531,6 +553,8 @@ struct mlx5e_xdpsq {
u32 sqn;
struct device *pdev;
__be32 mkey_be;
+ u16 stop_room;
+ u8 max_sq_mpw_wqebbs;
u8 min_inline_mode;
unsigned long state;
unsigned int hw_mtu;
@@ -540,6 +564,8 @@ struct mlx5e_xdpsq {
struct mlx5e_channel *channel;
} ____cacheline_aligned_in_smp;
+struct mlx5e_ktls_resync_resp;
+
struct mlx5e_icosq {
/* data path */
u16 cc;
@@ -550,14 +576,16 @@ struct mlx5e_icosq {
/* write@xmit, read@completion */
struct {
- struct mlx5e_sq_wqe_info *ico_wqe;
+ struct mlx5e_icosq_wqe_info *wqe_info;
} db;
/* read only */
struct mlx5_wq_cyc wq;
void __iomem *uar_map;
u32 sqn;
+ u16 reserved_room;
unsigned long state;
+ struct mlx5e_ktls_resync_resp *ktls_resync;
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
@@ -567,19 +595,15 @@ struct mlx5e_icosq {
} ____cacheline_aligned_in_smp;
struct mlx5e_wqe_frag_info {
- struct mlx5e_dma_info *di;
+ union mlx5e_alloc_unit *au;
u32 offset;
bool last_in_page;
};
-struct mlx5e_umr_dma_info {
- struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
-};
-
struct mlx5e_mpw_info {
- struct mlx5e_umr_dma_info umr;
u16 consumed_strides;
- DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
+ union mlx5e_alloc_unit alloc_units[];
};
#define MLX5E_MAX_RX_FRAGS 4
@@ -587,13 +611,13 @@ struct mlx5e_mpw_info {
/* a single cache unit is capable to serve one napi call (for non-striding rq)
* or a MPWQE (for striding rq).
*/
-#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
- MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+ MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
struct mlx5e_page_cache {
u32 head;
u32 tail;
- struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
+ struct page *page_cache[MLX5E_CACHE_SIZE];
};
struct mlx5e_rq;
@@ -602,10 +626,14 @@ typedef struct sk_buff *
(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
typedef struct sk_buff *
-(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
+
+int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk);
+void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params);
enum mlx5e_rq_flag {
MLX5E_RQ_FLAG_XDP_XMIT,
@@ -622,6 +650,38 @@ struct mlx5e_rq_frags_info {
u8 num_frags;
u8 log_num_frags;
u8 wqe_bulk;
+ u8 wqe_index_mask;
+};
+
+struct mlx5e_dma_info {
+ dma_addr_t addr;
+ struct page *page;
+};
+
+struct mlx5e_shampo_hd {
+ u32 mkey;
+ struct mlx5e_dma_info *info;
+ struct page *last_page;
+ u16 hd_per_wq;
+ u16 hd_per_wqe;
+ unsigned long *bitmap;
+ u16 pi;
+ u16 ci;
+ __be32 key;
+ u64 last_addr;
+};
+
+struct mlx5e_hw_gro_data {
+ struct sk_buff *skb;
+ struct flow_keys fk;
+ int second_ip_id;
+};
+
+enum mlx5e_mpwrq_umr_mode {
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED,
+ MLX5E_MPWRQ_UMR_MODE_UNALIGNED,
+ MLX5E_MPWRQ_UMR_MODE_OVERSIZED,
+ MLX5E_MPWRQ_UMR_MODE_TRIPLE,
};
struct mlx5e_rq {
@@ -630,7 +690,7 @@ struct mlx5e_rq {
struct {
struct mlx5_wq_cyc wq;
struct mlx5e_wqe_frag_info *frags;
- struct mlx5e_dma_info *di;
+ union mlx5e_alloc_unit *alloc_units;
struct mlx5e_rq_frags_info info;
mlx5e_fp_skb_from_cqe skb_from_cqe;
} wqe;
@@ -639,21 +699,28 @@ struct mlx5e_rq {
struct mlx5e_umr_wqe umr_wqe;
struct mlx5e_mpw_info *info;
mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
+ __be32 umr_mkey_be;
u16 num_strides;
u16 actual_wq_head;
u8 log_stride_sz;
u8 umr_in_progress;
u8 umr_last_bulk;
u8 umr_completed;
+ u8 min_wqe_bulk;
+ u8 page_shift;
+ u8 pages_per_wqe;
+ u8 umr_wqebbs;
+ u8 mtts_per_wqe;
+ u8 umr_mode;
+ struct mlx5e_shampo_hd *shampo;
} mpwqe;
};
struct {
- u16 umem_headroom;
u16 headroom;
+ u32 frame0_sz;
u8 map_dir; /* dma map direction */
} buff;
- struct mlx5e_channel *channel;
struct device *pdev;
struct net_device *netdev;
struct mlx5e_rq_stats *stats;
@@ -662,6 +729,10 @@ struct mlx5e_rq {
struct mlx5e_page_cache page_cache;
struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
+ struct mlx5e_icosq *icosq;
+ struct mlx5e_priv *priv;
+
+ struct mlx5e_hw_gro_data *hw_gro_data;
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_post_rx_wqes post_wqes;
@@ -674,14 +745,13 @@ struct mlx5e_rq {
struct dim dim; /* Dynamic Interrupt Moderation */
/* XDP */
- struct bpf_prog *xdp_prog;
+ struct bpf_prog __rcu *xdp_prog;
struct mlx5e_xdpsq *xdpsq;
DECLARE_BITMAP(flags, 8);
struct page_pool *page_pool;
/* AF_XDP zero-copy */
- struct zero_copy_allocator zca;
- struct xdp_umem *umem;
+ struct xsk_buff_pool *xsk_pool;
struct work_struct recover_work;
@@ -691,10 +761,12 @@ struct mlx5e_rq {
u8 wq_type;
u32 rqn;
struct mlx5_core_dev *mdev;
- struct mlx5_core_mkey umr_mkey;
+ struct mlx5e_channel *channel;
+ struct mlx5e_dma_info wqe_overflow;
/* XDP read-mostly */
struct xdp_rxq_info xdp_rxq;
+ cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
enum mlx5e_channel_state {
@@ -708,11 +780,13 @@ struct mlx5e_channel {
struct mlx5e_xdpsq rq_xdpsq;
struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_icosq icosq; /* internal control operations */
+ struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
bool xdp;
struct napi_struct napi;
struct device *pdev;
struct net_device *netdev;
__be32 mkey_be;
+ u16 qos_sqs_size;
u8 num_tc;
u8 lag_port;
@@ -722,12 +796,14 @@ struct mlx5e_channel {
/* AF_XDP zero-copy */
struct mlx5e_rq xskrq;
struct mlx5e_xdpsq xsksq;
- struct mlx5e_icosq xskicosq;
- /* xskicosq can be accessed from any CPU - the spinlock protects it. */
- spinlock_t xskicosq_lock;
+
+ /* Async ICOSQ */
+ struct mlx5e_icosq async_icosq;
+ /* async_icosq can be accessed from any CPU - the spinlock protects it. */
+ spinlock_t async_icosq_lock;
/* data path - accessed per napi poll */
- struct irq_desc *irq_desc;
+ const struct cpumask *aff_mask;
struct mlx5e_ch_stats *stats;
/* control */
@@ -737,11 +813,15 @@ struct mlx5e_channel {
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int cpu;
- cpumask_var_t xps_cpumask;
+ /* Sync between icosq recovery and XSK enable/disable. */
+ struct mutex icosq_recovery_lock;
};
+struct mlx5e_ptp;
+
struct mlx5e_channels {
struct mlx5e_channel **c;
+ struct mlx5e_ptp *ptp;
unsigned int num;
struct mlx5e_params params;
};
@@ -756,6 +836,13 @@ struct mlx5e_channel_stats {
struct mlx5e_xdpsq_stats xsksq;
} ____cacheline_aligned_in_smp;
+struct mlx5e_ptp_stats {
+ struct mlx5e_ch_stats ch;
+ struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq_stats rq;
+} ____cacheline_aligned_in_smp;
+
enum {
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
@@ -763,34 +850,13 @@ enum {
MLX5E_STATE_XDP_ACTIVE,
};
-struct mlx5e_rqt {
- u32 rqtn;
- bool enabled;
-};
-
-struct mlx5e_tir {
- u32 tirn;
- struct mlx5e_rqt rqt;
- struct list_head list;
-};
-
-enum {
- MLX5E_TC_PRIO = 0,
- MLX5E_NIC_PRIO
-};
-
-struct mlx5e_rss_params {
- u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
- u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
- u8 toeplitz_hash_key[40];
- u8 hfunc;
-};
-
struct mlx5e_modify_sq_param {
int curr_state;
int next_state;
int rl_update;
int rl_index;
+ bool qos_update;
+ u16 qos_queue_group_id;
};
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
@@ -803,20 +869,33 @@ struct mlx5e_hv_vhca_stats_agent {
#endif
struct mlx5e_xsk {
- /* UMEMs are stored separately from channels, because we don't want to
- * lose them when channels are recreated. The kernel also stores UMEMs,
- * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
- * so rely on our mechanism.
+ /* XSK buffer pools are stored separately from channels,
+ * because we don't want to lose them when channels are
+ * recreated. The kernel also stores buffer pool, but it doesn't
+ * distinguish between zero-copy and non-zero-copy UMEMs, so
+ * rely on our mechanism.
*/
- struct xdp_umem **umems;
+ struct xsk_buff_pool **pools;
u16 refcnt;
bool ever_used;
};
+/* Temporary storage for variables that are allocated when struct mlx5e_priv is
+ * initialized, and used where we can't allocate them because that functions
+ * must not fail. Use with care and make sure the same variable is not used
+ * simultaneously by multiple users.
+ */
+struct mlx5e_scratchpad {
+ cpumask_var_t cpumask;
+};
+
+struct mlx5e_trap;
+struct mlx5e_htb;
+
struct mlx5e_priv {
/* priv data path fields - start */
- struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
- int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+ struct mlx5e_selq selq;
+ struct mlx5e_txqsq **txq2sq;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx_dp dcbx_dp;
#endif
@@ -829,15 +908,10 @@ struct mlx5e_priv {
struct mlx5e_channels channels;
u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
- struct mlx5e_rqt indir_rqt;
- struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
- struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS];
- struct mlx5e_rss_params rss_params;
- u32 tx_rates[MLX5E_MAX_NUM_SQS];
+ struct mlx5e_rx_res *rx_res;
+ u32 *tx_rates;
- struct mlx5e_flow_steering fs;
+ struct mlx5e_flow_steering *fs;
struct workqueue_struct *wq;
struct work_struct update_carrier_work;
@@ -849,21 +923,34 @@ struct mlx5e_priv {
struct mlx5_core_dev *mdev;
struct net_device *netdev;
+ struct mlx5e_trap *en_trap;
struct mlx5e_stats stats;
- struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_channel_stats **channel_stats;
+ struct mlx5e_channel_stats trap_stats;
+ struct mlx5e_ptp_stats ptp_stats;
+ struct mlx5e_sq_stats **htb_qos_sq_stats;
+ u16 htb_max_qos_sqs;
+ u16 stats_nch;
u16 max_nch;
u8 max_opened_tc;
+ bool tx_ptp_opened;
+ bool rx_ptp_opened;
struct hwtstamp_config tstamp;
u16 q_counter;
u16 drop_rq_q_counter;
struct notifier_block events_nb;
+ struct notifier_block blocking_events_nb;
+ struct udp_tunnel_nic_info nic_info;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
#endif
const struct mlx5e_profile *profile;
void *ppriv;
+#ifdef CONFIG_MLX5_EN_MACSEC
+ struct mlx5e_macsec *macsec;
+#endif
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5e_ipsec *ipsec;
#endif
@@ -876,12 +963,30 @@ struct mlx5e_priv {
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
struct mlx5e_hv_vhca_stats_agent stats_agent;
#endif
+ struct mlx5e_scratchpad scratchpad;
+ struct mlx5e_htb *htb;
+ struct mlx5e_mqprio_rl *mqprio_rl;
+};
+
+struct mlx5e_rx_handlers {
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo;
+};
+
+extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
+
+enum mlx5e_profile_feature {
+ MLX5E_PROFILE_FEATURE_PTP_RX,
+ MLX5E_PROFILE_FEATURE_PTP_TX,
+ MLX5E_PROFILE_FEATURE_QOS_HTB,
+ MLX5E_PROFILE_FEATURE_FS_VLAN,
+ MLX5E_PROFILE_FEATURE_FS_TC,
};
struct mlx5e_profile {
int (*init)(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile, void *ppriv);
+ struct net_device *netdev);
void (*cleanup)(struct mlx5e_priv *priv);
int (*init_rx)(struct mlx5e_priv *priv);
void (*cleanup_rx)(struct mlx5e_priv *priv);
@@ -892,92 +997,35 @@ struct mlx5e_profile {
int (*update_rx)(struct mlx5e_priv *priv);
void (*update_stats)(struct mlx5e_priv *priv);
void (*update_carrier)(struct mlx5e_priv *priv);
+ int (*max_nch_limit)(struct mlx5_core_dev *mdev);
unsigned int (*stats_grps_num)(struct mlx5e_priv *priv);
mlx5e_stats_grp_t *stats_grps;
- struct {
- mlx5e_fp_handle_rx_cqe handle_rx_cqe;
- mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
- } rx_handlers;
+ const struct mlx5e_rx_handlers *rx_handlers;
int max_tc;
- u8 rq_groups;
+ u32 features;
};
-void mlx5e_build_ptys2ethtool_map(void);
-
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev);
-netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
-netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
-
-void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
-void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
-void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
-int mlx5e_napi_poll(struct napi_struct *napi, int budget);
-bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
-int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
-void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
-
-static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
-{
- switch (rq->wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- default:
- return mlx5_wq_cyc_get_size(&rq->wqe.wq);
- }
-}
+#define mlx5e_profile_feature_cap(profile, feature) \
+ ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
-static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
-{
- switch (rq->wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return rq->mpwqe.wq.cur_sz;
- default:
- return rq->wqe.wq.cur_sz;
- }
-}
+void mlx5e_build_ptys2ethtool_map(void);
-bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle);
-void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
-void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
-bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
-void mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
-bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
-void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
-void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
-struct sk_buff *
-mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
- u16 cqe_bcnt, u32 head_offset, u32 page_idx);
-struct sk_buff *
-mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
- u16 cqe_bcnt, u32 head_offset, u32 page_idx);
-struct sk_buff *
-mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
-struct sk_buff *
-mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
-void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
int mlx5e_self_test_num(struct mlx5e_priv *priv);
+int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data);
void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
u64 *buf);
void mlx5e_set_rx_mode_work(struct work_struct *work);
int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
-int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter);
int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
u16 vid);
@@ -985,87 +1033,93 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
u16 vid);
void mlx5e_timestamp_init(struct mlx5e_priv *priv);
-struct mlx5e_redirect_rqt_param {
- bool is_rss;
- union {
- u32 rqn; /* Direct RQN (Non-RSS) */
- struct {
- u8 hfunc;
- struct mlx5e_channels *channels;
- } rss; /* RSS data */
- };
-};
-
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
- struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
- const struct mlx5e_tirc_config *ttconfig,
- void *tirc, bool inner);
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
-struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
-
struct mlx5e_xsk_param;
struct mlx5e_rq_param;
-int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem, struct mlx5e_rq *rq);
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+ struct mlx5e_xsk_param *xsk, int node,
+ struct mlx5e_rq *rq);
+#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
-void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_close_rq(struct mlx5e_rq *rq);
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
+void mlx5e_destroy_rq(struct mlx5e_rq *rq);
struct mlx5e_sq_param;
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
-void mlx5e_close_icosq(struct mlx5e_icosq *sq);
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect);
void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+struct mlx5e_create_cq_param {
+ struct napi_struct *napi;
+ struct mlx5e_ch_stats *ch_stats;
+ int node;
+ int ix;
+};
+
struct mlx5e_cq_param;
-int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
- struct mlx5e_cq_param *param, struct mlx5e_cq *cq);
+int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
+ struct mlx5e_cq *cq);
void mlx5e_close_cq(struct mlx5e_cq *cq);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
+void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c);
+void mlx5e_trigger_napi_sched(struct napi_struct *napi);
+
int mlx5e_open_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *chs);
void mlx5e_close_channels(struct mlx5e_channels *chs);
-/* Function pointer to be used to modify WH settings while
+/* Function pointer to be used to modify HW or kernel settings while
* switching channels
*/
-typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context);
+#define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \
+int fn##_ctx(struct mlx5e_priv *priv, void *context) \
+{ \
+ return fn(priv); \
+}
int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
-int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *new_chs,
- mlx5e_fp_hw_modify hw_modify);
+int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+ struct mlx5e_params *new_params,
+ mlx5e_fp_preactivate preactivate,
+ void *context, bool reset);
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
+int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
- int num_channels);
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
- u8 cq_period_mode);
-void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
- u8 cq_period_mode);
-void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
struct mlx5e_modify_sq_param *p);
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+ struct mlx5e_params *params, struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
+ struct mlx5e_sq_stats *sq_stats);
void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_tx_disable_queue(struct netdev_queue *txq);
+int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa);
+void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq);
+struct mlx5e_create_sq_param;
+int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u16 qos_queue_group_id,
+ u32 *sqn);
+void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
{
@@ -1074,21 +1128,13 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
}
extern const struct ethtool_ops mlx5e_ethtool_ops;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
-int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
-void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
-void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
-void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
-#endif
-int mlx5e_create_tir(struct mlx5_core_dev *mdev,
- struct mlx5e_tir *tir, u32 *in, int inlen);
-void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
- struct mlx5e_tir *tir);
+int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey);
int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
-int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+ bool enable_mc_lb);
+void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc);
/* common netdev helpers */
void mlx5e_create_q_counters(struct mlx5e_priv *priv);
@@ -1097,17 +1143,6 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
struct mlx5e_rq *drop_rq);
void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
-
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
@@ -1117,15 +1152,14 @@ int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
-void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
-int mlx5e_bits_invert(unsigned long a, int size);
-typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
+int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context);
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
- change_hw_mtu_cb set_mtu_cb);
+ mlx5e_fp_preactivate preactivate);
+void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv);
/* ethtool helpers */
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -1136,7 +1170,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset);
void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
struct ethtool_stats *stats, u64 *data);
void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
- struct ethtool_ringparam *param);
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param);
int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
struct ethtool_ringparam *param);
void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
@@ -1144,13 +1179,22 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch);
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal);
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal);
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal);
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack);
int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
struct ethtool_link_ksettings *link_ksettings);
int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
const struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key,
+ const u8 hfunc);
+int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rule_locs);
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
@@ -1163,33 +1207,32 @@ int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
struct ethtool_pauseparam *pauseparam);
/* mlx5e generic netdev management API */
-int mlx5e_netdev_init(struct net_device *netdev,
- struct mlx5e_priv *priv,
- struct mlx5_core_dev *mdev,
- const struct mlx5e_profile *profile,
- void *ppriv);
-void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv);
-struct net_device*
-mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
- int nch, void *ppriv);
+static inline bool
+mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
+{
+ return !is_kdump_kernel() &&
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
+}
+
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev);
+int mlx5e_priv_init(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev,
+ struct mlx5_core_dev *mdev);
+void mlx5e_priv_cleanup(struct mlx5e_priv *priv);
+struct net_device *
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile);
int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *new_profile, void *new_ppriv);
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
-void mlx5e_build_nic_params(struct mlx5e_priv *priv,
- struct mlx5e_xsk *xsk,
- struct mlx5e_rss_params *rss_params,
- struct mlx5e_params *params,
- u16 mtu);
-void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
- u16 num_channels);
+void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
-void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
-void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
netdev_features_t mlx5e_features_check(struct sk_buff *skb,
struct net_device *netdev,
netdev_features_t features);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
new file mode 100644
index 000000000000..48581ea3adcb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+ return chs->num;
+}
+
+static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
+{
+ WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
+ return chs->c[ix];
+}
+
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ *rqn = c->rq.rqn;
+}
+
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
+
+ *rqn = c->xskrq.rqn;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+ struct mlx5e_ptp *c = chs->ptp;
+
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return false;
+
+ *rqn = c->rq.rqn;
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
new file mode 100644
index 000000000000..637ca90daaa8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
new file mode 100644
index 000000000000..b59aee75de94
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5E_DCBNL_H__
+#define __MLX5E_DCBNL_H__
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+
+#define MLX5E_MAX_PRIORITY (8)
+
+struct mlx5e_cee_config {
+ /* bw pct for priority group */
+ u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+ u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+ bool pfc_setting[CEE_DCBX_MAX_PRIO];
+ bool pfc_enable;
+};
+
+struct mlx5e_dcbx {
+ enum mlx5_dcbx_oper_mode mode;
+ struct mlx5e_cee_config cee_cfg; /* pending configuration */
+ u8 dscp_app_cnt;
+
+ /* The only setting that cannot be read from FW */
+ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
+ u8 cap;
+
+ /* Buffer configuration */
+ bool manual_buffer;
+ u32 cable_len;
+ u32 xoff;
+ u16 port_buff_cell_sz;
+};
+
+#define MLX5E_MAX_DSCP (64)
+
+struct mlx5e_dcbx_dp {
+ u8 dscp2prio[MLX5E_MAX_DSCP];
+ u8 trust_state;
+};
+
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#else
+static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_DCBNL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
new file mode 100644
index 000000000000..b69f9d10ccbd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en/devlink.h"
+#include "eswitch.h"
+
+static void
+mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+ u64 parent_id;
+
+ parent_id = mlx5_query_nic_system_image_guid(dev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
+{
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
+ struct devlink_port_attrs attrs = {};
+ struct netdev_phys_item_id ppid = {};
+ struct devlink_port *dl_port;
+ unsigned int dl_port_index;
+ int ret;
+
+ if (mlx5_core_is_pf(priv->mdev)) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = mlx5_get_dev_index(priv->mdev);
+ if (MLX5_ESWITCH_MANAGER(priv->mdev)) {
+ mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid);
+ memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+ attrs.switch_id.id_len = ppid.id_len;
+ }
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev,
+ MLX5_VPORT_UPLINK);
+ } else {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0);
+ }
+
+ dl_port = mlx5e_devlink_get_dl_port(priv);
+ memset(dl_port, 0, sizeof(*dl_port));
+ devlink_port_attrs_set(dl_port, &attrs);
+
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ ret = devl_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+ devlink_port_type_eth_set(dl_port, priv->netdev);
+}
+
+void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
+
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ devl_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+}
+
+struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (!netif_device_present(dev))
+ return NULL;
+
+ return mlx5e_devlink_get_dl_port(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
new file mode 100644
index 000000000000..10b50feb9883
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_DEVLINK_H
+#define __MLX5E_EN_DEVLINK_H
+
+#include <net/devlink.h>
+#include "en.h"
+
+int mlx5e_devlink_port_register(struct mlx5e_priv *priv);
+void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv);
+void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv);
+struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev);
+
+static inline struct devlink_port *
+mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv)
+{
+ return &priv->mdev->mlx5e_res.dl_port;
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 0416f7712109..bf2741eb7f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -4,24 +4,21 @@
#ifndef __MLX5E_FLOW_STEER_H__
#define __MLX5E_FLOW_STEER_H__
+#include "mod_hdr.h"
+#include "lib/fs_ttc.h"
+
+struct mlx5e_post_act;
+struct mlx5e_tc_table;
+
enum {
MLX5E_TC_FT_LEVEL = 0,
MLX5E_TC_TTC_FT_LEVEL,
+ MLX5E_TC_MISS_LEVEL,
};
-struct mlx5e_tc_table {
- /* protects flow table */
- struct mutex t_lock;
- struct mlx5_flow_table *t;
-
- struct rhashtable ht;
-
- struct mod_hdr_tbl mod_hdr;
- struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
- DECLARE_HASHTABLE(hairpin_tbl, 8);
-
- struct notifier_block netdevice_nb;
- struct netdev_net_notifier netdevice_nn;
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
};
struct mlx5e_flow_table {
@@ -37,51 +34,29 @@ struct mlx5e_l2_rule {
#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE)
-struct mlx5e_vlan_table {
- struct mlx5e_flow_table ft;
- DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
- DECLARE_BITMAP(active_svlans, VLAN_N_VID);
- struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID];
- struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID];
- struct mlx5_flow_handle *untagged_rule;
- struct mlx5_flow_handle *any_cvlan_rule;
- struct mlx5_flow_handle *any_svlan_rule;
- bool cvlan_filter_disabled;
+struct mlx5e_promisc_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *rule;
};
+/* Forward declaration and APIs to get private fields of vlan_table */
+struct mlx5e_vlan_table;
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan);
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan);
+
struct mlx5e_l2_table {
struct mlx5e_flow_table ft;
struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE];
struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE];
struct mlx5e_l2_rule broadcast;
struct mlx5e_l2_rule allmulti;
- struct mlx5e_l2_rule promisc;
+ struct mlx5_flow_handle *trap_rule;
bool broadcast_enabled;
bool allmulti_enabled;
bool promisc_enabled;
};
-enum mlx5e_traffic_types {
- MLX5E_TT_IPV4_TCP,
- MLX5E_TT_IPV6_TCP,
- MLX5E_TT_IPV4_UDP,
- MLX5E_TT_IPV6_UDP,
- MLX5E_TT_IPV4_IPSEC_AH,
- MLX5E_TT_IPV6_IPSEC_AH,
- MLX5E_TT_IPV4_IPSEC_ESP,
- MLX5E_TT_IPV6_IPSEC_ESP,
- MLX5E_TT_IPV4,
- MLX5E_TT_IPV6,
- MLX5E_TT_ANY,
- MLX5E_NUM_TT,
- MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
-};
-
-struct mlx5e_tirc_config {
- u8 l3_prot_type;
- u8 l4_prot_type;
- u32 rx_hash_fields;
-};
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP)
@@ -93,170 +68,137 @@ struct mlx5e_tirc_config {
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-enum mlx5e_tunnel_types {
- MLX5E_TT_IPV4_GRE,
- MLX5E_TT_IPV6_GRE,
- MLX5E_TT_IPV4_IPIP,
- MLX5E_TT_IPV6_IPIP,
- MLX5E_TT_IPV4_IPV6,
- MLX5E_TT_IPV6_IPV6,
- MLX5E_NUM_TUNNEL_TT,
-};
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
-
-/* L3/L4 traffic type classifier */
-struct mlx5e_ttc_table {
- struct mlx5e_flow_table ft;
- struct mlx5_flow_handle *rules[MLX5E_NUM_TT];
- struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
-};
-
/* NIC prio FTS */
enum {
- MLX5E_VLAN_FT_LEVEL = 0,
+ MLX5E_PROMISC_FT_LEVEL,
+ MLX5E_VLAN_FT_LEVEL,
MLX5E_L2_FT_LEVEL,
MLX5E_TTC_FT_LEVEL,
MLX5E_INNER_TTC_FT_LEVEL,
+ MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+ MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#ifdef CONFIG_MLX5_EN_TLS
+ MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#endif
#ifdef CONFIG_MLX5_EN_ARFS
- MLX5E_ARFS_FT_LEVEL
+ MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+ MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
#endif
};
-#define MLX5E_TTC_NUM_GROUPS 3
-#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
- MLX5E_TTC_GROUP2_SIZE +\
- MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS 3
-#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
- MLX5E_INNER_TTC_GROUP2_SIZE +\
- MLX5E_INNER_TTC_GROUP3_SIZE)
-
-#ifdef CONFIG_MLX5_EN_RXNFC
-
-struct mlx5e_ethtool_table {
- struct mlx5_flow_table *ft;
- int num_rules;
-};
-
-#define ETHTOOL_NUM_L3_L4_FTS 7
-#define ETHTOOL_NUM_L2_FTS 4
-
-struct mlx5e_ethtool_steering {
- struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
- struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
- struct list_head rules;
- int tot_num_rules;
-};
-
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
-int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
-int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
- struct ethtool_rxnfc *info, u32 *rule_locs);
-#else
-static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { }
-static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
-static inline int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
-{ return -EOPNOTSUPP; }
-static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
- struct ethtool_rxnfc *info, u32 *rule_locs)
-{ return -EOPNOTSUPP; }
-#endif /* CONFIG_MLX5_EN_RXNFC */
+struct mlx5e_flow_steering;
+struct mlx5e_rx_res;
#ifdef CONFIG_MLX5_EN_ARFS
-#define ARFS_HASH_SHIFT BITS_PER_BYTE
-#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
-
-struct arfs_table {
- struct mlx5e_flow_table ft;
- struct mlx5_flow_handle *default_rule;
- struct hlist_head rules_hash[ARFS_HASH_SIZE];
-};
-
-enum arfs_type {
- ARFS_IPV4_TCP,
- ARFS_IPV6_TCP,
- ARFS_IPV4_UDP,
- ARFS_IPV6_UDP,
- ARFS_NUM_TYPES,
-};
-
-struct mlx5e_arfs_tables {
- struct arfs_table arfs_tables[ARFS_NUM_TYPES];
- /* Protect aRFS rules list */
- spinlock_t arfs_lock;
- struct list_head rules;
- int last_filter_id;
- struct workqueue_struct *wq;
-};
+struct mlx5e_arfs_tables;
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
-int mlx5e_arfs_enable(struct mlx5e_priv *priv);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv);
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple);
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple);
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs);
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs);
int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#else
-static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
-static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
-static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{ return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
#endif
-struct mlx5e_flow_steering {
- struct mlx5_flow_namespace *ns;
-#ifdef CONFIG_MLX5_EN_RXNFC
- struct mlx5e_ethtool_steering ethtool;
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp;
#endif
- struct mlx5e_tc_table tc;
- struct mlx5e_vlan_table vlan;
- struct mlx5e_l2_table l2;
- struct mlx5e_ttc_table ttc;
- struct mlx5e_ttc_table inner_ttc;
-#ifdef CONFIG_MLX5_EN_ARFS
- struct mlx5e_arfs_tables arfs;
-#endif
-};
-struct ttc_params {
- struct mlx5_flow_table_attr ft_attr;
- u32 any_tt_tirn;
- u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_ttc_table *inner_ttc;
-};
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
-void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params);
+struct mlx5e_profile;
+struct mlx5e_fs_udp;
+struct mlx5e_fs_any;
+struct mlx5e_ptp_fs;
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc);
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel);
-int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res);
void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
-
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
-
-bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type);
-bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev);
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev);
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile);
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy);
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress);
+#ifdef CONFIG_MLX5_EN_RXNFC
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs);
+#endif
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner);
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner);
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs);
+#endif
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs);
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any);
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp);
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp);
+#endif
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy);
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable);
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs);
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs);
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+
+#define fs_err(fs, fmt, ...) \
+ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_dbg(fs, fmt, ...) \
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn(fs, fmt, ...) \
+ mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn_once(fs, fmt, ...) \
+ mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
#endif /* __MLX5E_FLOW_STEER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
new file mode 100644
index 000000000000..9e276fd3c0cf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5E_FS_ETHTOOL_H__
+#define __MLX5E_FS_ETHTOOL_H__
+
+struct mlx5e_priv;
+struct mlx5e_ethtool_steering;
+#ifdef CONFIG_MLX5_EN_RXNFC
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool);
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool);
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs);
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{ return 0; }
+static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { }
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
new file mode 100644
index 000000000000..03cb79adf912
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "en/fs_tt_redirect.h"
+#include "fs_core.h"
+#include "mlx5_core.h"
+
+enum fs_udp_type {
+ FS_IPV4_UDP,
+ FS_IPV6_UDP,
+ FS_UDP_NUM_TYPES,
+};
+
+struct mlx5e_fs_udp {
+ struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES];
+ struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES];
+ int ref_cnt;
+};
+
+struct mlx5e_fs_any {
+ struct mlx5e_flow_table table;
+ struct mlx5_flow_handle *default_rule;
+ int ref_cnt;
+};
+
+static char *fs_udp_type2str(enum fs_udp_type i)
+{
+ switch (i) {
+ case FS_IPV4_UDP:
+ return "UDP v4";
+ default: /* FS_IPV6_UDP */
+ return "UDP v6";
+ }
+}
+
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
+{
+ switch (i) {
+ case FS_IPV4_UDP:
+ return MLX5_TT_IPV4_UDP;
+ default: /* FS_IPV6_UDP */
+ return MLX5_TT_IPV6_UDP;
+ }
+}
+
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
+{
+ switch (i) {
+ case MLX5_TT_IPV4_UDP:
+ return FS_IPV4_UDP;
+ case MLX5_TT_IPV6_UDP:
+ return FS_IPV6_UDP;
+ default:
+ return FS_UDP_NUM_TYPES;
+ }
+}
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type,
+ u16 udp_dport)
+{
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ type == FS_IPV4_UDP ? 4 : 6);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5_traffic_types ttc_type,
+ u32 tir_num, u16 d_port)
+{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ enum fs_udp_type type = tt2fs_udp(ttc_type);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (type == FS_UDP_NUM_TYPES)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ft = fs_udp->tables[type].t;
+
+ fs_udp_set_dport_flow(spec, type, d_port);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add %s rule failed, err %d\n",
+ __func__, fs_udp_type2str(type), err);
+ }
+ return rule;
+}
+
+static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ struct mlx5e_flow_table *fs_udp_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ fs_udp_t = &fs_udp->tables[type];
+
+ dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type));
+ rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n",
+ __func__, type, err);
+ return err;
+ }
+
+ fs_udp->default_rules[type] = rule;
+ return 0;
+}
+
+#define MLX5E_FS_UDP_NUM_GROUPS (2)
+#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16))
+#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0))
+#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\
+ MLX5E_FS_UDP_GROUP2_SIZE)
+static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+ switch (type) {
+ case FS_IPV4_UDP:
+ case FS_IPV6_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ /* Match on udp protocol, Ipv4/6 and dport */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_UDP_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_UDP_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
+ int err;
+
+ ft = &fs_udp->tables[type];
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+ ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n",
+ fs_udp_type2str(type), ft->t->id, ft->t->level);
+
+ err = fs_udp_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = fs_udp_add_default_rule(fs, type);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
+{
+ if (IS_ERR_OR_NULL(fs_udp->tables[i].t))
+ return;
+
+ mlx5_del_flow_rules(fs_udp->default_rules[i]);
+ mlx5e_destroy_flow_table(&fs_udp->tables[i]);
+ fs_udp->tables[i].t = NULL;
+}
+
+static int fs_udp_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i));
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int fs_udp_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
+ struct mlx5_flow_destination dest = {};
+ int err, i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ dest.ft = udp->tables[i].t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest);
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ int i;
+
+ if (!fs_udp)
+ return;
+
+ if (--fs_udp->ref_cnt)
+ return;
+
+ fs_udp_disable(fs);
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++)
+ fs_udp_destroy_table(fs_udp, i);
+
+ kfree(fs_udp);
+ mlx5e_fs_set_udp(fs, NULL);
+}
+
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
+ int i, err;
+
+ if (udp) {
+ udp->ref_cnt++;
+ return 0;
+ }
+
+ udp = kzalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
+ return -ENOMEM;
+ mlx5e_fs_set_udp(fs, udp);
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ err = fs_udp_create_table(fs, i);
+ if (err)
+ goto err_destroy_tables;
+ }
+
+ err = fs_udp_enable(fs);
+ if (err)
+ goto err_destroy_tables;
+
+ udp->ref_cnt = 1;
+
+ return 0;
+
+err_destroy_tables:
+ while (--i >= 0)
+ fs_udp_destroy_table(udp, i);
+
+ kfree(udp);
+ mlx5e_fs_set_udp(fs, NULL);
+ return err;
+}
+
+static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type)
+{
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
+ u32 tir_num, u16 ether_type)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ft = fs_any->table.t;
+
+ fs_any_set_ethertype_flow(spec, ether_type);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add ANY rule failed, err %d\n",
+ __func__, err);
+ }
+ return rule;
+}
+
+static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *fs_any_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ fs_any_t = &fs_any->table;
+ dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY);
+ rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n",
+ __func__, err);
+ return err;
+ }
+
+ fs_any->default_rule = rule;
+ return 0;
+}
+
+#define MLX5E_FS_ANY_NUM_GROUPS (2)
+#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16))
+#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0))
+#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\
+ MLX5E_FS_ANY_GROUP2_SIZE)
+
+static int fs_any_create_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ /* Match on ethertype */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_ANY_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_ANY_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static int fs_any_create_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *ft = &fs_any->table;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+ ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n",
+ ft->t->id, ft->t->level);
+
+ err = fs_any_create_groups(ft);
+ if (err)
+ goto err;
+
+ err = fs_any_add_default_rule(fs);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static int fs_any_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err;
+
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY);
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
+ return err;
+ }
+ return 0;
+}
+
+static int fs_any_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs);
+ struct mlx5_flow_destination dest = {};
+ int err;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = any->table.t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest);
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
+ return err;
+ }
+ return 0;
+}
+
+static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
+{
+ if (IS_ERR_OR_NULL(fs_any->table.t))
+ return;
+
+ mlx5_del_flow_rules(fs_any->default_rule);
+ mlx5e_destroy_flow_table(&fs_any->table);
+ fs_any->table.t = NULL;
+}
+
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+
+ if (!fs_any)
+ return;
+
+ if (--fs_any->ref_cnt)
+ return;
+
+ fs_any_disable(fs);
+
+ fs_any_destroy_table(fs_any);
+
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
+}
+
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ int err;
+
+ if (fs_any) {
+ fs_any->ref_cnt++;
+ return 0;
+ }
+
+ fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL);
+ if (!fs_any)
+ return -ENOMEM;
+ mlx5e_fs_set_any(fs, fs_any);
+
+ err = fs_any_create_table(fs);
+ if (err)
+ return err;
+
+ err = fs_any_enable(fs);
+ if (err)
+ goto err_destroy_table;
+
+ fs_any->ref_cnt = 1;
+
+ return 0;
+
+err_destroy_table:
+ fs_any_destroy_table(fs_any);
+
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
new file mode 100644
index 000000000000..5780fd7ad507
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5E_FS_TT_REDIRECT_H__
+#define __MLX5E_FS_TT_REDIRECT_H__
+
+#include "en/fs.h"
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
+
+/* UDP traffic type redirect */
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5_traffic_types ttc_type,
+ u32 tir_num, u16 d_port);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs);
+
+/* ANY traffic type redirect*/
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
+ u32 tir_num, u16 ether_type);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 20b907dc1e29..6f4e6c34b2a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -3,8 +3,9 @@
#include "health.h"
#include "lib/eq.h"
+#include "lib/mlx5.h"
-int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
+int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
{
int err;
@@ -19,7 +20,7 @@ int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
return 0;
}
-int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
+int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
{
int err;
@@ -34,22 +35,21 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
return 0;
}
-int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
{
- struct mlx5e_priv *priv = cq->channel->priv;
u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
u8 hw_status;
void *cqc;
int err;
- err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
+ err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
if (err)
return err;
cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
hw_status = MLX5_GET(cqc, cqc, status);
- err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
if (err)
return err;
@@ -61,14 +61,22 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
if (err)
return err;
- err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
if (err)
return err;
return 0;
}
-int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
{
u8 cq_log_stride;
u32 cq_sz;
@@ -77,7 +85,7 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *
cq_sz = mlx5_cqwq_get_size(&cq->wq);
cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
- err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
if (err)
return err;
@@ -89,26 +97,48 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *
if (err)
return err;
- err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
if (err)
return err;
return 0;
}
-int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
+int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
{
int err;
- err = mlx5e_reporter_tx_create(priv);
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
if (err)
return err;
- err = mlx5e_reporter_rx_create(priv);
+ err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
if (err)
return err;
- return 0;
+ err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
+{
+ mlx5e_reporter_tx_create(priv);
+ mlx5e_reporter_rx_create(priv);
}
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
@@ -127,10 +157,8 @@ void mlx5e_health_channels_update(struct mlx5e_priv *priv)
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
}
-int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
+int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
{
- struct mlx5_core_dev *mdev = channel->mdev;
- struct net_device *dev = channel->netdev;
struct mlx5e_modify_sq_param msp = {};
int err;
@@ -175,21 +203,22 @@ out:
return err;
}
-int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
+int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
+ struct mlx5e_ch_stats *stats)
{
u32 eqe_count;
- netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
eq->core.eqn, eq->core.cons_index, eq->core.irqn);
eqe_count = mlx5_eq_poll_irq_disabled(eq);
if (!eqe_count)
return -EIO;
- netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
+ netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
eqe_count, eq->core.eqn);
- channel->stats->eq_rearm++;
+ stats->eq_rearm++;
return 0;
}
@@ -197,10 +226,114 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
struct devlink_health_reporter *reporter, char *err_str,
struct mlx5e_err_ctx *err_ctx)
{
- netdev_err(priv->netdev, err_str);
+ netdev_err(priv->netdev, "%s\n", err_str);
if (!reporter)
return err_ctx->recover(err_ctx->ctx);
return devlink_health_report(reporter, err_str, err_ctx);
}
+
+#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
+static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
+ const void *value, u32 value_len)
+
+{
+ u32 data_size;
+ int err = 0;
+ u32 offset;
+
+ for (offset = 0; offset < value_len; offset += data_size) {
+ data_size = value_len - offset;
+ if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
+ data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
+ err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct page *page;
+ int cmd_err, err;
+ int end_err;
+ int size;
+
+ if (IS_ERR_OR_NULL(mdev->rsc_dump))
+ return -EOPNOTSUPP;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
+ if (err)
+ goto free_page;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+
+ err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
+ if (err)
+ goto destroy_cmd;
+
+ } while (cmd_err > 0);
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+ end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
+ if (end_err)
+ err = end_err;
+free_page:
+ __free_page(page);
+ return err;
+}
+
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ int queue_idx, char *lbl)
+{
+ struct mlx5_rsc_key key = {};
+ int err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = queue_idx;
+ key.size = PAGE_SIZE;
+ key.num_of_obj1 = 1;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
+ if (err)
+ return err;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index d3693fa547ac..0107e4e73bb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -5,49 +5,54 @@
#define __MLX5E_EN_HEALTH_H
#include "en.h"
-
-#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
+#include "diag/rsc_dump.h"
static inline bool cqe_syndrome_needs_recover(u8 syndrome)
{
- return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR ||
- syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+ return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
}
-int mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
-int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
-int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
-int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
-int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
+int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg);
+int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
+int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg);
-int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
struct mlx5e_err_ctx {
int (*recover)(void *ctx);
+ int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
void *ctx;
};
-int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn);
-int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel);
+int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn);
+int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
+ struct mlx5e_ch_stats *stats);
int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
int mlx5e_health_report(struct mlx5e_priv *priv,
struct devlink_health_reporter *reporter, char *err_str,
struct mlx5e_err_ctx *err_ctx);
-int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_create_reporters(struct mlx5e_priv *priv);
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
void mlx5e_health_channels_update(struct mlx5e_priv *priv);
-
-
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+ struct devlink_fmsg *fmsg);
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ int queue_idx, char *lbl);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
new file mode 100644
index 000000000000..6dac76fa58a3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/pkt_cls.h>
+#include "htb.h"
+#include "en.h"
+#include "../qos.h"
+
+struct mlx5e_qos_node {
+ struct hlist_node hnode;
+ struct mlx5e_qos_node *parent;
+ u64 rate;
+ u32 bw_share;
+ u32 max_average_bw;
+ u32 hw_id;
+ u32 classid; /* 16-bit, except root. */
+ u16 qid;
+};
+
+struct mlx5e_htb {
+ DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+ DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ struct mlx5e_selq *selq;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+/* Software representation of the QoS tree */
+
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt, err;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ err = callback(data, node->qid, node->hw_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb)
+{
+ int last;
+
+ last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev));
+ return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1;
+}
+
+static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb)
+{
+ int size = mlx5e_qos_max_leaf_nodes(htb->mdev);
+ struct mlx5e_priv *priv = htb->priv;
+ int res;
+
+ WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+ res = find_first_zero_bit(htb->qos_used_qids, size);
+
+ return res == size ? -ENOSPC : res;
+}
+
+static struct mlx5e_qos_node *
+mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid,
+ struct mlx5e_qos_node *parent)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->parent = parent;
+
+ node->qid = qid;
+ __set_bit(qid, htb->qos_used_qids);
+
+ node->classid = classid;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, classid);
+
+ mlx5e_update_tx_netdev_queues(htb->priv);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->qid = MLX5E_QOS_QID_INNER;
+ node->classid = MLX5E_HTB_CLASSID_ROOT;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node)
+{
+ hash_del_rcu(&node->hnode);
+ if (node->qid != MLX5E_QOS_QID_INNER) {
+ __clear_bit(node->qid, htb->qos_used_qids);
+ mlx5e_update_tx_netdev_queues(htb->priv);
+ }
+ /* Make sure this qid is no longer selected by mlx5e_select_queue, so
+ * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
+ */
+ synchronize_net();
+ kfree(node);
+}
+
+/* TX datapath API */
+
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid)
+{
+ struct mlx5e_qos_node *node;
+ u16 qid;
+ int res;
+
+ rcu_read_lock();
+
+ node = mlx5e_htb_node_find_rcu(htb, classid);
+ if (!node) {
+ res = -ENOENT;
+ goto out;
+ }
+ qid = READ_ONCE(node->qid);
+ if (qid == MLX5E_QOS_QID_INNER) {
+ res = -EINVAL;
+ goto out;
+ }
+ res = mlx5e_qid_from_qos(&htb->priv->channels, qid);
+
+out:
+ rcu_read_unlock();
+ return res;
+}
+
+/* HTB TC handlers */
+
+static int
+mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+ mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls);
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+ if (err)
+ goto err_cancel_selq;
+ }
+
+ root = mlx5e_htb_node_create_root(htb);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto err_free_queues;
+ }
+
+ err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+ goto err_sw_node_delete;
+ }
+
+ mlx5e_selq_apply(htb->selq);
+
+ return 0;
+
+err_sw_node_delete:
+ mlx5e_htb_node_delete(htb, root);
+
+err_free_queues:
+ if (opened)
+ mlx5e_qos_close_all_queues(&priv->channels);
+err_cancel_selq:
+ mlx5e_selq_cancel(htb->selq);
+ return err;
+}
+
+static int mlx5e_htb_root_del(struct mlx5e_htb *htb)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_DESTROY\n");
+
+ /* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
+ * so that we can safely switch to its non-HTB non-PTP fastpath.
+ */
+ synchronize_net();
+
+ mlx5e_selq_prepare_htb(htb->selq, 0, 0);
+ mlx5e_selq_apply(htb->selq);
+
+ root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT);
+ if (!root) {
+ qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n");
+ return -ENOENT;
+ }
+ err = mlx5_qos_destroy_node(htb->mdev, root->hw_id);
+ if (err)
+ qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n",
+ root->hw_id, err);
+ mlx5e_htb_node_delete(htb, root);
+
+ mlx5e_qos_deactivate_all_queues(&priv->channels);
+ mlx5e_qos_close_all_queues(&priv->channels);
+
+ return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate,
+ struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+ u64 share = 0;
+
+ while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+ parent = parent->parent;
+
+ if (parent->max_average_bw)
+ share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+ parent->max_average_bw);
+ else
+ share = 101;
+
+ *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+ qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+ rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+ return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw)
+{
+ /* Hardware treats 0 as "unlimited", set at least 1. */
+ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+}
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ int qid;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+ classid, parent_classid, rate, ceil);
+
+ qid = mlx5e_htb_find_unused_qos_qid(htb);
+ if (qid < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+ return qid;
+ }
+
+ parent = mlx5e_htb_node_find(htb, parent_classid);
+ if (!parent)
+ return -EINVAL;
+
+ node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ mlx5e_htb_node_delete(htb, node);
+ return err;
+ }
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *child;
+ struct mlx5e_priv *priv = htb->priv;
+ int err, tmp_err;
+ u32 new_hw_id;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+ classid, child_classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+ qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ /* Intentionally reuse the qid for the upcoming first child. */
+ child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto err_destroy_hw_node;
+ }
+
+ child->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share,
+ child->max_average_bw, &child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ goto err_delete_sw_node;
+ }
+
+ /* No fail point. */
+
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, child->qid, child->hw_id);
+ }
+ }
+
+ return 0;
+
+err_delete_sw_node:
+ child->qid = MLX5E_QOS_QID_INNER;
+ mlx5e_htb_node_delete(htb, child);
+
+err_destroy_hw_node:
+ tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id);
+ if (tmp_err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+ new_hw_id, classid, tmp_err);
+ return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode)
+ if (node->qid == qid)
+ break;
+
+ return node;
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *node;
+ struct netdev_queue *txq;
+ u16 qid, moved_qid;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
+
+ node = mlx5e_htb_node_find(htb, *classid);
+ if (!node)
+ return -ENOENT;
+
+ /* Store qid for reuse. */
+ qid = node->qid;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, qid));
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, *classid, err);
+
+ mlx5e_htb_node_delete(htb, node);
+
+ moved_qid = mlx5e_htb_cur_leaf_nodes(htb);
+
+ if (moved_qid == 0) {
+ /* The last QoS SQ was just destroyed. */
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+ moved_qid--;
+
+ if (moved_qid < qid) {
+ /* The highest QoS SQ was just destroyed. */
+ WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+ qid, moved_qid);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+
+ WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+ qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+ node = mlx5e_htb_node_find_by_qid(htb, moved_qid);
+ WARN(!node, "Could not find a node with qid %u to move to queue %u",
+ moved_qid, qid);
+
+ /* Stop traffic to the old queue. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+ __clear_bit(moved_qid, priv->htb->qos_used_qids);
+
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, moved_qid));
+ mlx5e_deactivate_qos_sq(priv, moved_qid);
+ mlx5e_close_qos_sq(priv, moved_qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, moved_qid);
+
+ __set_bit(qid, htb->qos_used_qids);
+ WRITE_ONCE(node->qid, qid);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+ node->classid, moved_qid, qid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ mlx5e_update_tx_netdev_queues(priv);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+ *classid = node->classid;
+ return 0;
+}
+
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ u32 old_hw_id, new_hw_id;
+ int err, saved_err = 0;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+ force ? "_FORCE" : "", classid);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id,
+ node->parent->bw_share,
+ node->parent->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ if (!force)
+ return err;
+ saved_err = err;
+ }
+
+ /* Store qid for reuse and prevent clearing the bit. */
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, qid);
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ parent = node->parent;
+ mlx5e_htb_node_delete(htb, node);
+
+ node = parent;
+ WRITE_ONCE(node->qid, qid);
+
+ /* Early return on error in force mode. Parent will still be an inner
+ * node to be deleted by a following delete operation.
+ */
+ if (saved_err)
+ return saved_err;
+
+ old_hw_id = node->hw_id;
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, old_hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ return 0;
+}
+
+static int
+mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *child;
+ int err = 0;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, child, hnode) {
+ u32 old_bw_share = child->bw_share;
+ int err_one;
+
+ if (child->parent != node)
+ continue;
+
+ mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share);
+ if (child->bw_share == old_bw_share)
+ continue;
+
+ err_one = mlx5_qos_update_node(htb->mdev, child->hw_id, child->bw_share,
+ child->max_average_bw, child->hw_id);
+ if (!err && err_one) {
+ err = err_one;
+
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+ qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+ node->classid, err);
+ }
+ }
+
+ return err;
+}
+
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ u32 bw_share, max_average_bw;
+ struct mlx5e_qos_node *node;
+ bool ceil_changed = false;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+ classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw);
+
+ err = mlx5_qos_update_node(htb->mdev, node->parent->hw_id, bw_share,
+ max_average_bw, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+ qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ if (max_average_bw != node->max_average_bw)
+ ceil_changed = true;
+
+ node->bw_share = bw_share;
+ node->max_average_bw = max_average_bw;
+
+ if (ceil_changed)
+ err = mlx5e_htb_update_children(htb, node, extack);
+
+ return err;
+}
+
+struct mlx5e_htb *mlx5e_htb_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL);
+}
+
+void mlx5e_htb_free(struct mlx5e_htb *htb)
+{
+ kvfree(htb);
+}
+
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv)
+{
+ htb->mdev = mdev;
+ htb->netdev = netdev;
+ htb->selq = selq;
+ htb->priv = priv;
+ hash_init(htb->qos_tc2node);
+ return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->extack);
+}
+
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb)
+{
+ mlx5e_htb_root_del(htb);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
new file mode 100644
index 000000000000..8386f1ea4559
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5E_EN_HTB_H_
+#define __MLX5E_EN_HTB_H_
+
+#include "qos.h"
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_selq;
+struct mlx5e_htb;
+
+typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id);
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data);
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb);
+
+/* TX datapath API */
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid);
+
+/* HTB TC handlers */
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+struct mlx5e_htb *mlx5e_htb_alloc(void);
+void mlx5e_htb_free(struct mlx5e_htb *htb);
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv);
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb);
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index ac44bbe95c5c..b4f3bd7d346e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -20,7 +20,7 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
struct mlx5e_channel_stats *stats;
int tc;
- stats = &priv->channel_stats[ch];
+ stats = priv->channel_stats[ch];
data->rx_packets = stats->rq.packets;
data->rx_bytes = stats->rq.bytes;
@@ -35,7 +35,7 @@ static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data,
{
int ch, i = 0;
- for (ch = 0; ch < priv->max_nch; ch++) {
+ for (ch = 0; ch < priv->stats_nch; ch++) {
void *buf = data + i;
if (WARN_ON_ONCE(buf +
@@ -51,7 +51,7 @@ static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data,
static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
{
return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
- priv->max_nch);
+ priv->stats_nch);
}
static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
@@ -100,7 +100,7 @@ static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
sagent = &priv->stats_agent;
block->version = MLX5_HV_VHCA_STATS_VERSION;
- block->rings = priv->max_nch;
+ block->rings = priv->stats_nch;
if (!block->command) {
cancel_delayed_work_sync(&priv->stats_agent.work);
@@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
cancel_delayed_work_sync(&priv->stats_agent.work);
}
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
- return -ENOMEM;
+ return;
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
@@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
PTR_ERR(agent));
kvfree(priv->stats_agent.buf);
- return IS_ERR_OR_NULL(agent);
+ return;
}
priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
-
- return 0;
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
index 664463faf77b..29c8c6d3260f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -7,19 +7,12 @@
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
#else
-
-static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
-{
- return 0;
-}
-
-static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
-{
-}
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
#endif
#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
new file mode 100644
index 000000000000..4e72ca8070e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <linux/hashtable.h>
+#include <linux/refcount.h>
+
+#include "mapping.h"
+
+#define MAPPING_GRACE_PERIOD 2000
+
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
+struct mapping_ctx {
+ struct xarray xarray;
+ DECLARE_HASHTABLE(ht, 8);
+ struct mutex lock; /* Guards hashtable and xarray */
+ unsigned long max_id;
+ size_t data_size;
+ bool delayed_removal;
+ struct delayed_work dwork;
+ struct list_head pending_list;
+ spinlock_t pending_list_lock; /* Guards pending list */
+ u64 id;
+ u8 type;
+ struct list_head list;
+ refcount_t refcount;
+};
+
+struct mapping_item {
+ struct rcu_head rcu;
+ struct list_head list;
+ unsigned long timeout;
+ struct hlist_node node;
+ int cnt;
+ u32 id;
+ char data[];
+};
+
+int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id)
+{
+ struct mapping_item *mi;
+ int err = -ENOMEM;
+ u32 hash_key;
+
+ mutex_lock(&ctx->lock);
+
+ hash_key = jhash(data, ctx->data_size, 0);
+ hash_for_each_possible(ctx->ht, mi, node, hash_key) {
+ if (!memcmp(data, mi->data, ctx->data_size))
+ goto attach;
+ }
+
+ mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL);
+ if (!mi)
+ goto err_alloc;
+
+ memcpy(mi->data, data, ctx->data_size);
+ hash_add(ctx->ht, &mi->node, hash_key);
+
+ err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id),
+ GFP_KERNEL);
+ if (err)
+ goto err_assign;
+attach:
+ ++mi->cnt;
+ *id = mi->id;
+
+ mutex_unlock(&ctx->lock);
+
+ return 0;
+
+err_assign:
+ hash_del(&mi->node);
+ kfree(mi);
+err_alloc:
+ mutex_unlock(&ctx->lock);
+
+ return err;
+}
+
+static void mapping_remove_and_free(struct mapping_ctx *ctx,
+ struct mapping_item *mi)
+{
+ xa_erase(&ctx->xarray, mi->id);
+ kfree_rcu(mi, rcu);
+}
+
+static void mapping_free_item(struct mapping_ctx *ctx,
+ struct mapping_item *mi)
+{
+ if (!ctx->delayed_removal) {
+ mapping_remove_and_free(ctx, mi);
+ return;
+ }
+
+ mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD);
+
+ spin_lock(&ctx->pending_list_lock);
+ list_add_tail(&mi->list, &ctx->pending_list);
+ spin_unlock(&ctx->pending_list_lock);
+
+ schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD);
+}
+
+int mapping_remove(struct mapping_ctx *ctx, u32 id)
+{
+ unsigned long index = id;
+ struct mapping_item *mi;
+ int err = -ENOENT;
+
+ mutex_lock(&ctx->lock);
+ mi = xa_load(&ctx->xarray, index);
+ if (!mi)
+ goto out;
+ err = 0;
+
+ if (--mi->cnt > 0)
+ goto out;
+
+ hash_del(&mi->node);
+ mapping_free_item(ctx, mi);
+out:
+ mutex_unlock(&ctx->lock);
+
+ return err;
+}
+
+int mapping_find(struct mapping_ctx *ctx, u32 id, void *data)
+{
+ unsigned long index = id;
+ struct mapping_item *mi;
+ int err = -ENOENT;
+
+ rcu_read_lock();
+ mi = xa_load(&ctx->xarray, index);
+ if (!mi)
+ goto err_find;
+
+ memcpy(data, mi->data, ctx->data_size);
+ err = 0;
+
+err_find:
+ rcu_read_unlock();
+ return err;
+}
+
+static void
+mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list)
+{
+ struct mapping_item *mi;
+
+ list_for_each_entry(mi, list, list)
+ mapping_remove_and_free(ctx, mi);
+}
+
+static void mapping_work_handler(struct work_struct *work)
+{
+ unsigned long min_timeout = 0, now = jiffies;
+ struct mapping_item *mi, *next;
+ LIST_HEAD(pending_items);
+ struct mapping_ctx *ctx;
+
+ ctx = container_of(work, struct mapping_ctx, dwork.work);
+
+ spin_lock(&ctx->pending_list_lock);
+ list_for_each_entry_safe(mi, next, &ctx->pending_list, list) {
+ if (time_after(now, mi->timeout))
+ list_move(&mi->list, &pending_items);
+ else if (!min_timeout ||
+ time_before(mi->timeout, min_timeout))
+ min_timeout = mi->timeout;
+ }
+ spin_unlock(&ctx->pending_list_lock);
+
+ mapping_remove_and_free_list(ctx, &pending_items);
+
+ if (min_timeout)
+ schedule_delayed_work(&ctx->dwork, abs(min_timeout - now));
+}
+
+static void mapping_flush_work(struct mapping_ctx *ctx)
+{
+ if (!ctx->delayed_removal)
+ return;
+
+ cancel_delayed_work_sync(&ctx->dwork);
+ mapping_remove_and_free_list(ctx, &ctx->pending_list);
+}
+
+struct mapping_ctx *
+mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ ctx->max_id = max_id ? max_id : UINT_MAX;
+ ctx->data_size = data_size;
+
+ if (delayed_removal) {
+ INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler);
+ INIT_LIST_HEAD(&ctx->pending_list);
+ spin_lock_init(&ctx->pending_list_lock);
+ ctx->delayed_removal = true;
+ }
+
+ mutex_init(&ctx->lock);
+ xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
+
+ refcount_set(&ctx->refcount, 1);
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ mutex_lock(&shared_ctx_lock);
+ list_for_each_entry(ctx, &shared_ctx_list, list) {
+ if (ctx->id == id && ctx->type == type) {
+ if (refcount_inc_not_zero(&ctx->refcount))
+ goto unlock;
+ break;
+ }
+ }
+
+ ctx = mapping_create(data_size, max_id, delayed_removal);
+ if (IS_ERR(ctx))
+ goto unlock;
+
+ ctx->id = id;
+ ctx->type = type;
+ list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+ mutex_unlock(&shared_ctx_lock);
+ return ctx;
+}
+
+void mapping_destroy(struct mapping_ctx *ctx)
+{
+ if (!refcount_dec_and_test(&ctx->refcount))
+ return;
+
+ mutex_lock(&shared_ctx_lock);
+ list_del(&ctx->list);
+ mutex_unlock(&shared_ctx_lock);
+
+ mapping_flush_work(ctx);
+ xa_destroy(&ctx->xarray);
+ mutex_destroy(&ctx->lock);
+
+ kfree(ctx);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
new file mode 100644
index 000000000000..4e2119f0f4c1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_MAPPING_H__
+#define __MLX5_MAPPING_H__
+
+struct mapping_ctx;
+
+int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id);
+int mapping_remove(struct mapping_ctx *ctx, u32 id);
+int mapping_find(struct mapping_ctx *ctx, u32 id, void *data);
+
+/* mapping uses an xarray to map data to ids in add(), and for find().
+ * For locking, it uses a internal xarray spin lock for add()/remove(),
+ * find() uses rcu_read_lock().
+ * Choosing delayed_removal postpones the removal of a previously mapped
+ * id by MAPPING_GRACE_PERIOD milliseconds.
+ * This is to avoid races against hardware, where we mark the packet in
+ * hardware with a previous id, and quick remove() and add() reusing the same
+ * previous id. Then find() will get the new mapping instead of the old
+ * which was used to mark the packet.
+ */
+struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
+ bool delayed_removal);
+void mapping_destroy(struct mapping_ctx *ctx);
+
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
+#endif /* __MLX5_MAPPING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
new file mode 100644
index 000000000000..17325c5d6516
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies
+
+#include <linux/jhash.h>
+#include "mod_hdr.h"
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
+struct mod_hdr_key {
+ int num_actions;
+ void *actions;
+};
+
+struct mlx5e_mod_hdr_handle {
+ /* a node of a hash table which keeps all the mod_hdr entries */
+ struct hlist_node mod_hdr_hlist;
+
+ struct mod_hdr_key key;
+
+ struct mlx5_modify_hdr *modify_hdr;
+
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+};
+
+static u32 hash_mod_hdr_info(struct mod_hdr_key *key)
+{
+ return jhash(key->actions,
+ key->num_actions * MLX5_MH_ACT_SZ, 0);
+}
+
+static int cmp_mod_hdr_info(struct mod_hdr_key *a, struct mod_hdr_key *b)
+{
+ if (a->num_actions != b->num_actions)
+ return 1;
+
+ return memcmp(a->actions, b->actions,
+ a->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl)
+{
+ mutex_init(&tbl->lock);
+ hash_init(tbl->hlist);
+}
+
+void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl)
+{
+ mutex_destroy(&tbl->lock);
+}
+
+static struct mlx5e_mod_hdr_handle *mod_hdr_get(struct mod_hdr_tbl *tbl,
+ struct mod_hdr_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_mod_hdr_handle *mh, *found = NULL;
+
+ hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, key)) {
+ refcount_inc(&mh->refcnt);
+ found = mh;
+ break;
+ }
+ }
+
+ return found;
+}
+
+struct mlx5e_mod_hdr_handle *
+mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int num_actions, actions_size, err;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mod_hdr_key key;
+ u32 hash_key;
+
+ num_actions = mod_hdr_acts->num_actions;
+ actions_size = MLX5_MH_ACT_SZ * num_actions;
+
+ key.actions = mod_hdr_acts->actions;
+ key.num_actions = num_actions;
+
+ hash_key = hash_mod_hdr_info(&key);
+
+ mutex_lock(&tbl->lock);
+ mh = mod_hdr_get(tbl, &key, hash_key);
+ if (mh) {
+ mutex_unlock(&tbl->lock);
+ wait_for_completion(&mh->res_ready);
+
+ if (mh->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto attach_header_err;
+ }
+ goto attach_header;
+ }
+
+ mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
+ if (!mh) {
+ mutex_unlock(&tbl->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mh->key.actions = (void *)mh + sizeof(*mh);
+ memcpy(mh->key.actions, key.actions, actions_size);
+ mh->key.num_actions = num_actions;
+ refcount_set(&mh->refcnt, 1);
+ init_completion(&mh->res_ready);
+
+ hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
+ mutex_unlock(&tbl->lock);
+
+ mh->modify_hdr = mlx5_modify_header_alloc(mdev, namespace,
+ mh->key.num_actions,
+ mh->key.actions);
+ if (IS_ERR(mh->modify_hdr)) {
+ err = PTR_ERR(mh->modify_hdr);
+ mh->compl_result = err;
+ goto alloc_header_err;
+ }
+ mh->compl_result = 1;
+ complete_all(&mh->res_ready);
+
+attach_header:
+ return mh;
+
+alloc_header_err:
+ complete_all(&mh->res_ready);
+attach_header_err:
+ mlx5e_mod_hdr_detach(mdev, tbl, mh);
+ return ERR_PTR(err);
+}
+
+void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
+ return;
+ hash_del(&mh->mod_hdr_hlist);
+ mutex_unlock(&tbl->lock);
+
+ if (mh->compl_result > 0)
+ mlx5_modify_header_dealloc(mdev, mh->modify_hdr);
+
+ kfree(mh);
+}
+
+struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh)
+{
+ return mh->modify_hdr;
+}
+
+char *
+mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int new_num_actions, max_hw_actions;
+ size_t new_sz, old_sz;
+ void *ret;
+
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
+ goto out;
+
+ max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace);
+ new_num_actions = min(max_hw_actions,
+ mod_hdr_acts->actions ?
+ mod_hdr_acts->max_actions * 2 : 1);
+ if (mod_hdr_acts->max_actions == new_num_actions)
+ return ERR_PTR(-ENOSPC);
+
+ new_sz = MLX5_MH_ACT_SZ * new_num_actions;
+ old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ;
+
+ if (mod_hdr_acts->is_static) {
+ ret = kzalloc(new_sz, GFP_KERNEL);
+ if (ret) {
+ memcpy(ret, mod_hdr_acts->actions, old_sz);
+ mod_hdr_acts->is_static = false;
+ }
+ } else {
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
+ if (ret)
+ memset(ret + old_sz, 0, new_sz - old_sz);
+ }
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ mod_hdr_acts->actions = ret;
+ mod_hdr_acts->max_actions = new_num_actions;
+
+out:
+ return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void
+mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ if (!mod_hdr_acts->is_static)
+ kfree(mod_hdr_acts->actions);
+
+ mod_hdr_acts->actions = NULL;
+ mod_hdr_acts->num_actions = 0;
+ mod_hdr_acts->max_actions = 0;
+}
+
+char *
+mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos)
+{
+ return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
new file mode 100644
index 000000000000..b8dac418d0a5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#ifndef __MLX5E_EN_MOD_HDR_H__
+#define __MLX5E_EN_MOD_HDR_H__
+
+#include <linux/hashtable.h>
+#include <linux/mlx5/fs.h>
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
+struct mlx5e_mod_hdr_handle;
+
+struct mlx5e_tc_mod_hdr_acts {
+ int num_actions;
+ int max_actions;
+ bool is_static;
+ void *actions;
+};
+
+#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \
+ u8 name[len][MLX5_MH_ACT_SZ] = {}
+
+#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \
+ struct mlx5e_tc_mod_hdr_acts name = { \
+ .max_actions = ARRAY_SIZE(acts_arr), \
+ .is_static = true, \
+ .actions = acts_arr, \
+ }
+
+char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos);
+
+struct mlx5e_mod_hdr_handle *
+mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ struct mlx5e_mod_hdr_handle *mh);
+struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh);
+
+void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl);
+void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl);
+
+static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
+#endif /* __MLX5E_EN_MOD_HDR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
index 7cd5b02e0f10..254c84739046 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -38,12 +38,11 @@ int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
{
- u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
- u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
MLX5_SET(arm_monitor_counter_in, in, opcode,
MLX5_CMD_OP_ARM_MONITOR_COUNTER);
- mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in);
}
static void mlx5e_monitor_counters_work(struct work_struct *work)
@@ -52,7 +51,7 @@ static void mlx5e_monitor_counters_work(struct work_struct *work)
monitor_counters_work);
mutex_lock(&priv->state_lock);
- mlx5e_update_ndo_stats(priv);
+ mlx5e_stats_update_ndo_stats(priv);
mutex_unlock(&priv->state_lock);
mlx5e_monitor_counter_arm(priv);
}
@@ -66,19 +65,6 @@ static int mlx5e_monitor_event_handler(struct notifier_block *nb,
return NOTIFY_OK;
}
-static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
-{
- MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
- MONITOR_COUNTER);
- mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
-}
-
-static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
-{
- mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
- cancel_work_sync(&priv->monitor_counters_work);
-}
-
static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
{
enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
@@ -118,8 +104,7 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
- u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
int q_counter = priv->q_counter;
int cnt = 0;
@@ -136,34 +121,31 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
MLX5_SET(set_monitor_counter_in, in, opcode,
MLX5_CMD_OP_SET_MONITOR_COUNTER);
- mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(mdev, set_monitor_counter, in);
}
/* check if mlx5e_monitor_counter_supported before calling this function*/
void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
{
INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
- mlx5e_monitor_counter_start(priv);
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+
mlx5e_set_monitor_counter(priv);
mlx5e_monitor_counter_arm(priv);
queue_work(priv->wq, &priv->update_stats_work);
}
-static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
{
- u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
MLX5_SET(set_monitor_counter_in, in, opcode,
MLX5_CMD_OP_SET_MONITOR_COUNTER);
- mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
-}
-
-/* check if mlx5e_monitor_counter_supported before calling this function*/
-void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
-{
- mlx5e_monitor_counter_disable(priv);
- mlx5e_monitor_counter_stop(priv);
+ mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in);
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index eb2e1f2138e4..29dd3a04c154 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -2,133 +2,405 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "en/params.h"
+#include "en/txrx.h"
+#include "en/port.h"
+#include "en_accel/en_accel.h"
+#include "en_accel/ipsec.h"
+#include <net/xdp_sock_drv.h>
-static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
+{
+ u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);
+
+ return min_page_shift ? : 12;
+}
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
+
+ /* Regular RQ uses order-0 pages, the NIC must be able to map them. */
+ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
+ min_page_shift = req_page_shift;
+
+ return max(req_page_shift, min_page_shift);
+}
+
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ /* Different memory management schemes use different mechanisms to map
+ * user-mode memory. The stricter guarantees we have, the faster
+ * mechanisms we use:
+ * 1. MTT - direct mapping in page granularity.
+ * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but
+ * all mappings have the same size.
+ * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
+ * mappings can have different sizes.
+ */
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ bool oversized = false;
+
+ if (xsk) {
+ oversized = xsk->chunk_size < (1 << page_shift);
+ WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift));
+ }
+
+ /* XSK frame size doesn't match the UMR page size, either because the
+ * frame size is not a power of two, or it's smaller than the minimal
+ * page size supported by the firmware.
+ * It's possible to receive packets bigger than MTU in certain setups.
+ * To avoid writing over the XSK frame boundary, the top region of each
+ * stride is mapped to a garbage page, resulting in two mappings of
+ * different sizes per frame.
+ */
+ if (oversized) {
+ /* An optimization for frame sizes equal to 3 * power_of_two.
+ * 3 KSMs point to the frame, and one KSM points to the garbage
+ * page, which works faster than KLM.
+ */
+ if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3))
+ return MLX5E_MPWRQ_UMR_MODE_TRIPLE;
+
+ return MLX5E_MPWRQ_UMR_MODE_OVERSIZED;
+ }
+
+ /* XSK frames can start at arbitrary unaligned locations, but they all
+ * have the same size which is a power of two. It allows to optimize to
+ * one KSM per frame.
+ */
+ if (unaligned)
+ return MLX5E_MPWRQ_UMR_MODE_UNALIGNED;
+
+ /* XSK: frames are naturally aligned, MTT can be used.
+ * Non-XSK: Allocations happen in units of CPU pages, therefore, the
+ * mappings are naturally aligned.
+ */
+ return MLX5E_MPWRQ_UMR_MODE_ALIGNED;
+}
+
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
+{
+ switch (mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return sizeof(struct mlx5_mtt);
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return sizeof(struct mlx5_ksm);
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return sizeof(struct mlx5_klm) * 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return sizeof(struct mlx5_ksm) * 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
+ return 0;
+}
+
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u8 max_pages_per_wqe, max_log_mpwqe_size;
+ u16 max_wqe_size;
+
+ /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
+ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
+ max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
+ MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size;
+ max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;
+
+ WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);
+
+ return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
+}
+
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+ u8 pages_per_wqe;
+
+ pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
+
+ /* Two MTTs are needed to form an octword. The number of MTTs is encoded
+ * in octwords in a UMR WQE, so we need at least two to avoid mapping
+ * garbage addresses.
+ */
+ if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ pages_per_wqe = 2;
+
+ /* Sanity check for further calculations to succeed. */
+ BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
+ if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
+ return MLX5_MPWRQ_MAX_PAGES_PER_WQE;
+
+ return pages_per_wqe;
+}
+
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u16 umr_wqe_sz;
+
+ umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) +
+ ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
+
+ WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK);
+
+ return umr_wqe_sz;
+}
+
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode),
+ MLX5_SEND_WQE_BB);
+}
+
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+
+ /* Add another page as a buffer between WQEs. This page will absorb
+ * write overflow by the hardware, when receiving packets larger than
+ * MTU. These oversize packets are dropped by the driver at a later
+ * stage.
+ */
+ return ALIGN(pages_per_wqe + 1,
+ MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
+}
+
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- return params->xdp_prog || xsk;
+ /* Same limits apply to KSMs and KLMs. */
+ u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS,
+ 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5E_MAX_RQ_NUM_MTTS;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return klm_limit;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ /* Each entry is two KLMs. */
+ return klm_limit / 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ /* Each entry is four KSMs. */
+ return klm_limit / 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode);
+ u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode);
+
+ return ilog2(max_entries / mtts_per_wqe);
+}
+
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) +
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ MLX5E_ORDER2_MAX_PACKET_MTU;
}
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u16 headroom = NET_IP_ALIGN;
+ u16 headroom;
+
+ if (xsk)
+ return xsk->headroom;
- if (mlx5e_rx_is_xdp(params, xsk)) {
+ headroom = NET_IP_ALIGN;
+ if (params->xdp_prog)
headroom += XDP_PACKET_HEADROOM;
- if (xsk)
- headroom += xsk->headroom;
- } else {
+ else
headroom += MLX5_RX_HEADROOM;
- }
return headroom;
}
-u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
- return linear_rq_headroom + hw_mtu;
+ return xsk->headroom + hw_mtu;
}
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
{
- u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
-
- /* AF_XDP doesn't build SKBs in place. */
- if (!xsk)
- frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
+ /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
+ u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a
- * special case. It can run with frames smaller than a page, as it
- * doesn't allocate pages dynamically. However, here we pretend that
- * fragments are page-sized: it allows to treat XSK frames like pages
- * by redirecting alloc and free operations to XSK rings and by using
- * the fact there are no multiple packets per "page" (which is a frame).
- * The latter is important, because frames may come in a random order,
- * and we will have trouble assemblying a real page of multiple frames.
- */
- if (mlx5e_rx_is_xdp(params, xsk))
- frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
+ return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
+}
- /* Even if we can go with a smaller fragment size, we must not put
- * multiple packets into a single frame.
+static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ bool mpwqe)
+{
+ /* XSK frames are mapped as individual pages, because frames may come in
+ * an arbitrary order from random locations in the UMEM.
*/
if (xsk)
- frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
+ return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
- return frag_sz;
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SIZE;
+
+ return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
+ u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
- return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ order_base_2(linear_stride_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
- * than one page. For this, check both with and without xsk.
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
+ return false;
+
+ /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
+ * must fit into a CPU page.
*/
- u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
- mlx5e_rx_get_linear_frag_sz(params, NULL));
+ if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
+ return false;
+
+ /* XSK frames must be big enough to hold the packet data. */
+ if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size)
+ return false;
+
+ return true;
+}
+
+static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ u8 log_stride_sz, u8 log_num_strides,
+ u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ if (log_stride_sz + log_num_strides !=
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode))
+ return false;
+
+ if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
+ log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX)
+ return false;
- return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
+ if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX)
+ return false;
+
+ if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+ return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE;
+
+ return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
}
-#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
- MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
- s8 signed_log_num_strides_param;
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
u8 log_num_strides;
+ u8 log_stride_sz;
+ u8 log_wqe_sz;
- if (!mlx5e_rx_is_linear_skb(params, xsk))
+ if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
return false;
- if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
- return false;
+ log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
- if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
- return true;
+ if (log_wqe_sz < log_stride_sz)
+ return false;
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
- signed_log_num_strides_param =
- (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+ log_num_strides = log_wqe_sz - log_stride_sz;
- return signed_log_num_strides_param >= 0;
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz,
+ log_num_strides, page_shift,
+ umr_mode);
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
+
+ log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
+ page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+ /* Ethtool's rx_max_pending is calculated for regular RQ, that uses
+ * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a
+ * frame size not equal to PAGE_SIZE.
+ * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on
+ * unexpected failure.
+ */
+ if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size))
+ return max_log_rq_size;
+
return params->log_rq_mtu_frames - log_pkts_per_wqe;
}
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE));
+}
+
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE);
+}
+
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
+ PAGE_SIZE;
+
+ return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu));
+}
+
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
+ return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
@@ -137,17 +409,832 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- return MLX5_MPWRQ_LOG_WQE_SZ -
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
+{
+#define UMR_WQE_BULK (2)
+ return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1);
+}
+
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params, xsk) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+ return linear_headroom;
+
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return linear_headroom;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ return linear_headroom;
+
+ return 0;
+}
+
+u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+ u16 stop_room;
+
+ stop_room = mlx5e_ktls_get_stop_room(mdev, params);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
+ if (is_mpwqe)
+ /* A MPWQE can take up to the maximum cacheline-aligned WQE +
+ * all the normal stop room can be taken if a new packet breaks
+ * the active MPWQE session and allocates its WQEs right away.
+ */
+ stop_room += mlx5e_stop_room_for_mpwqe(mdev);
+
+ return stop_room;
+}
+
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ size_t sq_size = 1 << params->log_sq_size;
+ u16 stop_room;
+
+ stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+ if (stop_room >= sq_size) {
+ mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n",
+ stop_room, sq_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+{
+ struct dim_cq_moder moder = {};
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+ struct dim_cq_moder moder = {};
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+ return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->tx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+ } else {
+ params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+ }
+}
+
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->rx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+ } else {
+ params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
+ }
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ mlx5e_reset_tx_moderation(params, cq_period_mode);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ params->tx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ mlx5e_reset_rx_moderation(params, cq_period_mode);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ params->rx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
+{
+ u32 link_speed = 0;
+ u32 pci_bw = 0;
+
+ mlx5e_port_max_linkspeed(mdev, &link_speed);
+ pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
+ mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+ link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+ return link_speed && pci_bw &&
+ link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
+}
+
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL);
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ u16 max_mtu_pkts;
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return -EINVAL;
+
+ /* Current RQ length is too big for the given frame size, the
+ * needed number of WQEs exceeds the maximum.
+ */
+ max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
+ if (params->log_rq_mtu_frames > max_mtu_pkts) {
+ mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
+ 1 << params->log_rq_mtu_frames, xsk->chunk_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+ BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
+ BIT(params->log_rq_mtu_frames),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_CYCLIC;
+}
+
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Prefer Striding RQ, unless any of the following holds:
+ * - Striding RQ configuration is not possible/supported.
+ * - CQE compression is ON, and stride_index mini_cqe layout is not supported.
+ * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ *
+ * No XSK params: checking the availability of striding RQ in general.
+ */
+ if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ||
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) &&
+ !mlx5e_mpwrq_validate_regular(mdev, params) &&
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+ !mlx5e_rx_is_linear_skb(mdev, params, NULL)))
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+}
+
+/* Build queue parameters */
+
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c)
+{
+ *ccp = (struct mlx5e_create_cq_param) {
+ .napi = &c->napi,
+ .ch_stats = c->stats,
+ .node = cpu_to_node(c->cpu),
+ .ix = c->ix,
+ };
+}
+
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
+{
+ if (xdp)
+ /* XDP requires all fragments to be of the same size. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size;
+
+ /* Optimization for small packets: the last fragment is bigger than the others. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
+}
+
+#define DEFAULT_FRAG_SIZE (2048)
+
+static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_frags_info *info)
+{
+ u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ int frag_size_max = DEFAULT_FRAG_SIZE;
+ int first_frag_size_max;
+ u32 buf_size = 0;
+ u16 headroom;
+ int max_mtu;
+ int i;
+
+ if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
+ int frag_stride;
+
+ frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);
+
+ info->arr[0].frag_size = byte_count;
+ info->arr[0].frag_stride = frag_stride;
+ info->num_frags = 1;
+
+ /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The
+ * first WQE in the page is responsible for allocation of this
+ * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are
+ * still not completed, the allocation must stop before k*N.
+ */
+ info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1;
+
+ goto out;
+ }
+
+ headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu || params->xdp_prog) {
+ frag_size_max = PAGE_SIZE;
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu) {
+ mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
+ params->sw_mtu, max_mtu);
+ return -EINVAL;
+ }
+ }
+
+ i = 0;
+ while (buf_size < byte_count) {
+ int frag_size = byte_count - buf_size;
+
+ if (i == 0)
+ frag_size = min(frag_size, first_frag_size_max);
+ else if (i < MLX5E_MAX_RX_FRAGS - 1)
+ frag_size = min(frag_size, frag_size_max);
+
+ info->arr[i].frag_size = frag_size;
+ buf_size += frag_size;
+
+ if (params->xdp_prog) {
+ /* XDP multi buffer expects fragments of the same size. */
+ info->arr[i].frag_stride = frag_size_max;
+ } else {
+ if (i == 0) {
+ /* Ensure that headroom and tailroom are included. */
+ frag_size += headroom;
+ frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+ }
+
+ i++;
+ }
+ info->num_frags = i;
+
+ /* The last fragment of WQE with index 2*N may share the page with the
+ * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1
+ * is not completed yet, WQE 2*N must not be allocated, as it's
+ * responsible for allocating a new page.
+ */
+ if (frag_size_max == PAGE_SIZE) {
+ /* No WQE can start in the middle of a page. */
+ info->wqe_index_mask = 0;
+ } else {
+ /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments,
+ * because there would be more than MLX5E_MAX_RX_FRAGS of them.
+ */
+ WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE);
+
+ /* Odd number of fragments allows to pack the last fragment of
+ * the previous WQE and the first fragment of the next WQE into
+ * the same page.
+ * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS
+ * is 4, the last fragment can be bigger than the rest only if
+ * it's the fourth one, so WQEs consisting of 3 fragments will
+ * always share a page.
+ * When a page is shared, WQE bulk size is 2, otherwise just 1.
+ */
+ info->wqe_index_mask = info->num_frags % 2;
+ }
+
+out:
+ /* Bulking optimization to skip allocation until at least 8 WQEs can be
+ * allocated in a row. At the same time, never start allocation when
+ * the page is still used by older WQEs.
+ */
+ info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+
+ info->log_num_frags = order_base_2(info->num_frags);
+
+ return 0;
+}
+
+static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+ int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ sz += sizeof(struct mlx5e_rx_wqe_ll);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ sz += sizeof(struct mlx5e_rx_wqe_cyc);
+ }
+
+ return order_base_2(sz);
+}
+
+static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
+}
+
+static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+
+ /* +1 is for the case that the pkt_per_rsrv dont consume the reservation
+ * so we get a filler cqe for the rest of the reservation.
+ */
+ return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1));
+}
+
+static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param)
+{
+ bool hw_stridx = false;
+ void *cqc = param->cqc;
+ u8 log_cq_size;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
+ else
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ log_cq_size = params->log_rq_mtu_frames;
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
+ MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ }
+
+ mlx5e_build_common_cq_param(mdev, param);
+ param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
+}
+
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
+ bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+ MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+ return ro && lro_en ?
+ MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int ndsegs = 1;
+ int err;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
+ u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+ log_wqe_num_of_strides,
+ page_shift, umr_mode)) {
+ mlx5_core_err(mdev,
+ "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
+ log_wqe_stride_size, log_wqe_num_of_strides,
+ umr_mode);
+ return -EINVAL;
+ }
+
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ MLX5_SET(wq, wq, shampo_enable, true);
+ MLX5_SET(wq, wq, log_reservation_size,
+ mlx5e_shampo_get_log_rsrv_size(mdev, params));
+ MLX5_SET(wq, wq,
+ log_max_num_of_packets_per_reservation,
+ mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ MLX5_SET(wq, wq, log_headers_entry_size,
+ mlx5e_shampo_get_log_hd_entry_size(mdev, params));
+ MLX5_SET(rqc, rqc, reservation_timeout,
+ params->packet_merge.timeout);
+ MLX5_SET(rqc, rqc, shampo_match_criteria_type,
+ params->packet_merge.shampo.match_criteria_type);
+ MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
+ params->packet_merge.shampo.alignment_granularity);
+ }
+ break;
+ }
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ if (err)
+ return err;
+ ndsegs = param->frags_info.num_frags;
+ }
+
+ MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
+ MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+ MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
+ MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ mlx5e_build_rx_cq_param(mdev, params, xsk, &param->cqp);
+
+ return 0;
+}
+
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ u16 q_counter,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
+
+ mlx5e_build_common_cq_param(mdev, param);
+ param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
+}
+
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ bool allow_swp;
+
+ allow_swp =
+ mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev);
+ mlx5e_build_sq_param_common(mdev, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+ param->stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
+
+ mlx5e_build_common_cq_param(mdev, param);
+
+ param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * per WQE, the formula is based on the size of the reservations and the
+ * restriction we have about max packets for reservation that is equal to max
+ * headers per reservation.
+ */
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL));
+ int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL);
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+ u32 hd_per_wqe;
+
+ /* Assumption: hd_per_wqe % 8 == 0. */
+ hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv;
+ mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n",
+ __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv);
+ return hd_per_wqe;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * for the WQ, this value is uesed to allocate the header buffer in HW, thus
+ * must be a pow of 2.
+ */
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 hd_per_wqe, hd_per_wq;
+
+ hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size);
+ return hd_per_wq;
+}
+
+static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 wqebbs;
+
+ max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+ max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
+ rest = max_hd_per_wqe % max_klm_per_umr;
+ wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+ if (rest)
+ wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+ wqebbs *= wq_size;
+ return wqebbs;
+}
+
+static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 umr_wqebbs;
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+
+ return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp)
+{
+ u32 wqebbs, total_pages, useful_space;
+
+ /* MLX5_WQ_TYPE_CYCLIC */
+ if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ /* UMR WQEs for the regular RQ. */
+ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+
+ /* If XDP program is attached, XSK may be turned on at any time without
+ * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
+ * both regular RQ and XSK RQ.
+ *
+ * XSK uses different values of page_shift, and the total number of UMR
+ * WQEBBs depends on it. This dependency is complex and not monotonic,
+ * especially taking into consideration that some of the parameters come
+ * from capabilities. Hence, we have to try all valid values of XSK
+ * frame size (and page_shift) to find the maximum.
+ */
+ if (params->xdp_prog) {
+ u32 max_xsk_wqebbs = 0;
+ u8 frame_shift;
+
+ for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
+ frame_shift <= PAGE_SHIFT; frame_shift++) {
+ /* The headroom doesn't affect the calculation. */
+ struct mlx5e_xsk_param xsk = {
+ .chunk_size = 1 << frame_shift,
+ .unaligned = false,
+ };
+
+ /* XSK aligned mode. */
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a power of two. */
+ xsk.unaligned = true;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is not equal to stride size. */
+ xsk.chunk_size -= 1;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a triple power of two. */
+ xsk.chunk_size = (1 << frame_shift) / 4 * 3;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+ }
+
+ wqebbs += max_xsk_wqebbs;
+ }
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+
+ /* UMR WQEs don't cross the page boundary, they are padded with NOPs.
+ * This padding is always smaller than the max WQE size. That gives us
+ * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
+ * per page. The number of pages is estimated as the total size of WQEs
+ * divided by the useful space in page, rounding up. If some WQEs don't
+ * fully fit into the useful space, they can occupy part of the padding,
+ * which proves this estimation to be correct (reserve enough space).
+ */
+ useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB;
+ total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space);
+ wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB);
+
+ return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
+}
+
+static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
+{
+ if (mlx5e_is_ktls_rx(mdev))
+ return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
+static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+ mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
+}
+
+static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+ param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
+ param->is_tls = mlx5e_is_ktls_rx(mdev);
+ if (param->is_tls)
+ param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
+}
+
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+ param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam)
+{
+ u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
+ int err;
+
+ err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
+ if (err)
+ return err;
+
+ icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq);
+ async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
+
+ mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
+ mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq);
+ mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
+ mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
- return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+ return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 989d8f429438..034debd140bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -9,119 +9,149 @@
struct mlx5e_xsk_param {
u16 headroom;
u16 chunk_size;
+ bool unaligned;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
};
struct mlx5e_rq_param {
+ struct mlx5e_cq_param cqp;
u32 rqc[MLX5_ST_SZ_DW(rqc)];
struct mlx5_wq_param wq;
struct mlx5e_rq_frags_info frags_info;
};
struct mlx5e_sq_param {
+ struct mlx5e_cq_param cqp;
u32 sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param wq;
bool is_mpw;
-};
-
-struct mlx5e_cq_param {
- u32 cqc[MLX5_ST_SZ_DW(cqc)];
- struct mlx5_wq_param wq;
- u16 eq_ix;
- u8 cq_period_mode;
+ bool is_tls;
+ bool is_xdp_mb;
+ u16 stop_room;
};
struct mlx5e_channel_param {
struct mlx5e_rq_param rq;
- struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param txq_sq;
struct mlx5e_sq_param xdp_sq;
struct mlx5e_sq_param icosq;
- struct mlx5e_cq_param rx_cq;
- struct mlx5e_cq_param tx_cq;
- struct mlx5e_cq_param icosq_cq;
+ struct mlx5e_sq_param async_icosq;
};
-static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
- u16 qid,
- enum mlx5e_rq_group group,
- u16 *ix)
-{
- int nch = params->num_channels;
- int ch = qid - nch * group;
-
- if (ch < 0 || ch >= nch)
- return false;
-
- *ix = ch;
- return true;
-}
-
-static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
- u16 qid,
- u16 *ix,
- enum mlx5e_rq_group *group)
-{
- u16 nch = params->num_channels;
-
- *ix = qid % nch;
- *group = qid / nch;
-}
-
-static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
- struct mlx5e_params *params, u64 qid)
-{
- return qid < params->num_channels * profile->rq_groups;
-}
+struct mlx5e_create_sq_param {
+ struct mlx5_wq_ctrl *wq_ctrl;
+ u32 cqn;
+ u32 ts_cqe_to_dest_cqn;
+ u32 tisn;
+ u8 tis_lst_sz;
+ u8 min_inline_mode;
+};
+
+/* Striding RQ dynamic parameters */
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode);
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
/* Parameter calculations */
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
/* Build queue parameters */
-void mlx5e_build_rq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_param *param);
-void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c);
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ u16 q_counter,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param);
-void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
struct mlx5e_cq_param *param);
-void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_cq_param *param);
-void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_cq_param *param);
-void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_sq_param *param);
-void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param);
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam);
+
+u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index fce6eccdcf8b..89510cac46c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -76,13 +76,31 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
[MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
[MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
[MLX5E_400GAUI_8] = 400000,
+ [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
+ [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
+ [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
};
+bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_port_eth_proto eproto;
+ int err;
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
+ return true;
+
+ err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
+ if (err)
+ return false;
+
+ return !!eproto.cap;
+}
+
static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
const u32 **arr, u32 *size,
bool force_legacy)
{
- bool ext = force_legacy ? false : MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev);
*size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
ARRAY_SIZE(mlx5e_link_speed);
@@ -177,7 +195,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
bool ext;
int err;
- ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ ext = mlx5e_ptys_ext_supported(mdev);
err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
goto out;
@@ -205,7 +223,7 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
int err;
int i;
- ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ ext = mlx5e_ptys_ext_supported(mdev);
err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
return err;
@@ -343,64 +361,63 @@ out:
return err;
}
-static u32 fec_supported_speeds[] = {
- 10000,
- 40000,
- 25000,
- 50000,
- 56000,
- 100000
+enum mlx5e_fec_supported_link_mode {
+ MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_25G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_50G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_56G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_100G,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X,
+ MLX5E_MAX_FEC_SUPPORTED_LINK_MODE,
};
-#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds)
+#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X
+
+#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \
+ do { \
+ u16 *_policy = &(policy); \
+ u32 *_buf = buf; \
+ \
+ if (write) \
+ MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \
+ else \
+ *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \
+ } while (0)
/* get/set FEC admin field for a given speed */
-static int mlx5e_fec_admin_field(u32 *pplm,
- u8 *fec_policy,
- bool write,
- u32 speed)
+static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write,
+ enum mlx5e_fec_supported_link_mode link_mode)
{
- switch (speed) {
- case 10000:
- case 40000:
- if (!write)
- *fec_policy = MLX5_GET(pplm_reg, pplm,
- fec_override_admin_10g_40g);
- else
- MLX5_SET(pplm_reg, pplm,
- fec_override_admin_10g_40g, *fec_policy);
+ switch (link_mode) {
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g);
break;
- case 25000:
- if (!write)
- *fec_policy = MLX5_GET(pplm_reg, pplm,
- fec_override_admin_25g);
- else
- MLX5_SET(pplm_reg, pplm,
- fec_override_admin_25g, *fec_policy);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_25G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g);
break;
- case 50000:
- if (!write)
- *fec_policy = MLX5_GET(pplm_reg, pplm,
- fec_override_admin_50g);
- else
- MLX5_SET(pplm_reg, pplm,
- fec_override_admin_50g, *fec_policy);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_50G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g);
break;
- case 56000:
- if (!write)
- *fec_policy = MLX5_GET(pplm_reg, pplm,
- fec_override_admin_56g);
- else
- MLX5_SET(pplm_reg, pplm,
- fec_override_admin_56g, *fec_policy);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_56G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g);
break;
- case 100000:
- if (!write)
- *fec_policy = MLX5_GET(pplm_reg, pplm,
- fec_override_admin_100g);
- else
- MLX5_SET(pplm_reg, pplm,
- fec_override_admin_100g, *fec_policy);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_100G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x);
break;
default:
return -EINVAL;
@@ -408,32 +425,40 @@ static int mlx5e_fec_admin_field(u32 *pplm,
return 0;
}
+#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \
+ MLX5_GET(pplm_reg, buf, fec_override_cap_##link)
+
/* returns FEC capabilities for a given speed */
-static int mlx5e_get_fec_cap_field(u32 *pplm,
- u8 *fec_cap,
- u32 speed)
+static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap,
+ enum mlx5e_fec_supported_link_mode link_mode)
{
- switch (speed) {
- case 10000:
- case 40000:
- *fec_cap = MLX5_GET(pplm_reg, pplm,
- fec_override_cap_10g_40g);
+ switch (link_mode) {
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_25G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g);
break;
- case 25000:
- *fec_cap = MLX5_GET(pplm_reg, pplm,
- fec_override_cap_25g);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_50G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g);
break;
- case 50000:
- *fec_cap = MLX5_GET(pplm_reg, pplm,
- fec_override_cap_50g);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_56G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g);
break;
- case 56000:
- *fec_cap = MLX5_GET(pplm_reg, pplm,
- fec_override_cap_56g);
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_100G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g);
break;
- case 100000:
- *fec_cap = MLX5_GET(pplm_reg, pplm,
- fec_override_cap_100g);
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x);
break;
default:
return -EINVAL;
@@ -441,40 +466,45 @@ static int mlx5e_get_fec_cap_field(u32 *pplm,
return 0;
}
-int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps)
+bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy)
{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(pplm_reg);
- u32 current_fec_speed;
int err;
+ int i;
- if (!MLX5_CAP_GEN(dev, pcam_reg))
- return -EOPNOTSUPP;
-
- if (!MLX5_CAP_PCAM_REG(dev, pplm))
- return -EOPNOTSUPP;
+ if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm))
+ return false;
MLX5_SET(pplm_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
if (err)
- return err;
+ return false;
- err = mlx5e_port_linkspeed(dev, &current_fec_speed);
- if (err)
- return err;
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ u16 fec_caps;
+
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
- return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed);
+ mlx5e_get_fec_cap_field(out, &fec_caps, i);
+ if (fec_caps & fec_policy)
+ return true;
+ }
+ return false;
}
int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
- u8 *fec_configured_mode)
+ u16 *fec_configured_mode)
{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(pplm_reg);
- u32 link_speed;
int err;
+ int i;
if (!MLX5_CAP_GEN(dev, pcam_reg))
return -EOPNOTSUPP;
@@ -490,24 +520,28 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
*fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
if (!fec_configured_mode)
- return 0;
+ goto out;
- err = mlx5e_port_linkspeed(dev, &link_speed);
- if (err)
- return err;
+ *fec_configured_mode = 0;
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
- return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed);
+ mlx5e_fec_admin_field(out, fec_configured_mode, 0, i);
+ if (*fec_configured_mode != 0)
+ goto out;
+ }
+out:
+ return 0;
}
-int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
{
- u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC);
- bool fec_mode_not_supp_in_speed = false;
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(pplm_reg);
- u8 fec_policy_auto = 0;
- u8 fec_caps = 0;
+ u16 fec_policy_auto = 0;
int err;
int i;
@@ -517,6 +551,12 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
if (!MLX5_CAP_PCAM_REG(dev, pplm))
return -EOPNOTSUPP;
+ if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
+ return -EOPNOTSUPP;
+
+ if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
+ return -EOPNOTSUPP;
+
MLX5_SET(pplm_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
if (err)
@@ -524,25 +564,31 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
MLX5_SET(pplm_reg, out, local_port, 1);
- for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) {
- mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]);
- /* policy supported for link speed, or policy is auto */
- if (fec_caps & fec_policy || fec_policy == fec_policy_auto) {
- mlx5e_fec_admin_field(out, &fec_policy, 1,
- fec_supported_speeds[i]);
- } else {
- /* turn off FEC if supported. Else, leave it the same */
- if (fec_caps & fec_policy_nofec)
- mlx5e_fec_admin_field(out, &fec_policy_nofec, 1,
- fec_supported_speeds[i]);
- fec_mode_not_supp_in_speed = true;
- }
- }
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ u16 conf_fec = fec_policy;
+ u16 fec_caps = 0;
+
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514
+ * to link modes up to 25G per lane and to
+ * MLX5E_FEC_RS_544_514 in the new link modes based on
+ * 50 G per lane
+ */
+ if (conf_fec == (1 << MLX5E_FEC_RS_528_514) &&
+ i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
+ conf_fec = (1 << MLX5E_FEC_RS_544_514);
- if (fec_mode_not_supp_in_speed)
- mlx5_core_dbg(dev,
- "FEC policy 0x%x is not supported for some speeds",
- fec_policy);
+ mlx5e_get_fec_cap_field(out, &fec_caps, i);
+
+ /* policy supported for link speed */
+ if (fec_caps & conf_fec)
+ mlx5e_fec_admin_field(out, &conf_fec, 1, i);
+ else
+ /* set FEC to auto*/
+ mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i);
+ }
return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index 4a7f4497692b..7a7defe60792 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -54,21 +54,23 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
bool force_legacy);
-
+bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev);
int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
-int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps);
+bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy);
int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
- u8 *fec_configured_mode);
-int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy);
+ u16 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy);
enum {
MLX5E_FEC_NOFEC,
MLX5E_FEC_FIRECODE,
MLX5E_FEC_RS_528_514,
+ MLX5E_FEC_RS_544_514 = 7,
+ MLX5E_FEC_LLRS_272_257_1 = 9,
};
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index ae99fac08b53..c9d5d8d93994 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -34,6 +34,7 @@
int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
struct mlx5e_port_buffer *port_buffer)
{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
struct mlx5_core_dev *mdev = priv->mdev;
int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
u32 total_used = 0;
@@ -57,11 +58,11 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
port_buffer->buffer[i].epsb =
MLX5_GET(bufferx_reg, buffer, epsb);
port_buffer->buffer[i].size =
- MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT;
+ MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz;
port_buffer->buffer[i].xon =
- MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+ MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz;
port_buffer->buffer[i].xoff =
- MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+ MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz;
total_used += port_buffer->buffer[i].size;
mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
@@ -73,7 +74,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
}
port_buffer->port_buffer_size =
- MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT;
+ MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz;
port_buffer->spare_buffer_size =
port_buffer->port_buffer_size - total_used;
@@ -88,9 +89,9 @@ out:
static int port_set_buffer(struct mlx5e_priv *priv,
struct mlx5e_port_buffer *port_buffer)
{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
struct mlx5_core_dev *mdev = priv->mdev;
int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
- void *buffer;
void *in;
int err;
int i;
@@ -104,16 +105,18 @@ static int port_set_buffer(struct mlx5e_priv *priv,
goto out;
for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
- buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
-
- MLX5_SET(bufferx_reg, buffer, size,
- port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT);
- MLX5_SET(bufferx_reg, buffer, lossy,
- port_buffer->buffer[i].lossy);
- MLX5_SET(bufferx_reg, buffer, xoff_threshold,
- port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT);
- MLX5_SET(bufferx_reg, buffer, xon_threshold,
- port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT);
+ void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+ u64 size = port_buffer->buffer[i].size;
+ u64 xoff = port_buffer->buffer[i].xoff;
+ u64 xon = port_buffer->buffer[i].xon;
+
+ do_div(size, port_buff_cell_sz);
+ do_div(xoff, port_buff_cell_sz);
+ do_div(xon, port_buff_cell_sz);
+ MLX5_SET(bufferx_reg, buffer, size, size);
+ MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy);
+ MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff);
+ MLX5_SET(bufferx_reg, buffer, xon_threshold, xon);
}
err = mlx5e_port_set_pbmc(mdev, in);
@@ -143,7 +146,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
}
static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
- u32 xoff, unsigned int max_mtu)
+ u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz)
{
int i;
@@ -155,7 +158,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
}
if (port_buffer->buffer[i].size <
- (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) {
+ (xoff + max_mtu + port_buff_cell_sz)) {
pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n",
i, port_buffer->buffer[i].size);
return -ENOMEM;
@@ -175,6 +178,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
* @pfc_en: <input> current pfc configuration
* @buffer: <input> current prio to buffer mapping
* @xoff: <input> xoff value
+ * @port_buff_cell_sz: <input> port buffer cell_size
* @port_buffer: <output> port receive buffer configuration
* @change: <output>
*
@@ -189,7 +193,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
* sets change to true if buffer configuration was modified.
*/
static int update_buffer_lossy(unsigned int max_mtu,
- u8 pfc_en, u8 *buffer, u32 xoff,
+ u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz,
struct mlx5e_port_buffer *port_buffer,
bool *change)
{
@@ -225,7 +229,7 @@ static int update_buffer_lossy(unsigned int max_mtu,
}
if (changed) {
- err = update_xoff_threshold(port_buffer, xoff, max_mtu);
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
if (err)
return err;
@@ -262,6 +266,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
u32 *buffer_size,
u8 *prio2buffer)
{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
struct mlx5e_port_buffer port_buffer;
u32 xoff = calculate_xoff(priv, mtu);
bool update_prio2buffer = false;
@@ -282,7 +287,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
if (err)
return err;
}
@@ -292,7 +297,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz,
&port_buffer, &update_buffer);
if (err)
return err;
@@ -304,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
- xoff, &port_buffer, &update_buffer);
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+ port_buff_cell_sz, &port_buffer, &update_buffer);
if (err)
return err;
}
@@ -329,7 +334,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
return -EINVAL;
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
if (err)
return err;
}
@@ -337,7 +342,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
/* Need to update buffer configuration if xoff value is changed */
if (!update_buffer && xoff != priv->dcbx.xoff) {
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
index 34f55b81a0de..80af7a5ac604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -36,7 +36,6 @@
#include "port.h"
#define MLX5E_MAX_BUFFER 8
-#define MLX5E_BUFFER_CELL_SHIFT 7
#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
new file mode 100644
index 000000000000..8469e9c38670
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -0,0 +1,854 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies
+
+#include "en/ptp.h"
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/fs_tt_redirect.h"
+
+struct mlx5e_ptp_fs {
+ struct mlx5_flow_handle *l2_rule;
+ struct mlx5_flow_handle *udp_v4_rule;
+ struct mlx5_flow_handle *udp_v6_rule;
+ bool valid;
+};
+
+struct mlx5e_ptp_params {
+ struct mlx5e_params params;
+ struct mlx5e_sq_param txq_sq_param;
+ struct mlx5e_rq_param rq_param;
+};
+
+struct mlx5e_skb_cb_hwtstamp {
+ ktime_t cqe_hwtstamp;
+ ktime_t port_hwtstamp;
+};
+
+void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb)
+{
+ memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
+}
+
+static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_skb_cb_hwtstamp) > sizeof(skb->cb));
+ return (struct mlx5e_skb_cb_hwtstamp *)skb->cb;
+}
+
+static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
+ struct mlx5e_ptp_cq_stats *cq_stats)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ ktime_t diff;
+
+ diff = abs(mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp -
+ mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp);
+
+ /* Maximal allowed diff is 1 / 128 second */
+ if (diff > (NSEC_PER_SEC >> 7)) {
+ cq_stats->abort++;
+ cq_stats->abort_abs_diff_ns += diff;
+ return;
+ }
+
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+}
+
+void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
+ ktime_t hwtstamp,
+ struct mlx5e_ptp_cq_stats *cq_stats)
+{
+ switch (hwtstamp_type) {
+ case (MLX5E_SKB_CB_CQE_HWTSTAMP):
+ mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp = hwtstamp;
+ break;
+ case (MLX5E_SKB_CB_PORT_HWTSTAMP):
+ mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp = hwtstamp;
+ break;
+ }
+
+ /* If both CQEs arrive, check and report the port tstamp, and clear skb cb as
+ * skb soon to be released.
+ */
+ if (!mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp ||
+ !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
+ return;
+
+ mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
+ memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
+}
+
+#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
+
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+}
+
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ struct sk_buff *skb;
+
+ ptpsq->cq_stats->resync_event++;
+
+ while (skb_cc != skb_id) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+ ptpsq->cq_stats->resync_cqe++;
+ skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ }
+}
+
+static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
+ struct mlx5_cqe64 *cqe,
+ int budget)
+{
+ u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct sk_buff *skb;
+ ktime_t hwtstamp;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ ptpsq->cq_stats->err_cqe++;
+ goto out;
+ }
+
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
+
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
+ mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
+ hwtstamp, ptpsq->cq_stats);
+ ptpsq->cq_stats->cqe++;
+
+out:
+ napi_consume_skb(skb, budget);
+}
+
+static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+{
+ struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
+ struct mlx5_cqwq *cqwq = &cq->wq;
+ struct mlx5_cqe64 *cqe;
+ int work_done = 0;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(cqwq);
+ if (!cqe)
+ return false;
+
+ do {
+ mlx5_cqwq_pop(cqwq);
+
+ mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
+
+ mlx5_cqwq_update_db_record(cqwq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ return work_done == budget;
+}
+
+static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi);
+ struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_rq *rq = &c->rq;
+ bool busy = false;
+ int work_done = 0;
+ int i;
+
+ rcu_read_lock();
+
+ ch_stats->poll++;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (i = 0; i < c->num_tc; i++) {
+ busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget);
+ busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget);
+ }
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) {
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= INDIRECT_CALL_2(rq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ rq);
+ }
+
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ ch_stats->arm++;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq);
+ mlx5e_cq_arm(&c->ptpsq[i].ts_cq);
+ }
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
+
+ return work_done;
+}
+
+static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc,
+ struct mlx5e_ptpsq *ptpsq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+ int node;
+
+ sq->pdev = c->pdev;
+ sq->clock = &mdev->clock;
+ sq->mkey_be = c->mkey_be;
+ sq->netdev = c->netdev;
+ sq->priv = c->priv;
+ sq->mdev = mdev;
+ sq->ch_ix = MLX5E_PTP_CHANNEL_IX;
+ sq->txq_ix = txq_ix;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->stats = &c->priv->ptp_stats.sq[tc];
+ sq->ptpsq = ptpsq;
+ INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+ if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
+ sq->stop_room = param->stop_room;
+ sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
+
+ node = dev_to_node(mlx5_core_dma_dev(mdev));
+
+ param->wq.db_numa_node = node;
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_txqsq_db(sq, node);
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ mlx5_core_destroy_sq(mdev, sqn);
+}
+
+static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq);
+ struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev;
+
+ ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)),
+ GFP_KERNEL, numa);
+ if (!ptpsq->skb_fifo.fifo)
+ return -ENOMEM;
+
+ ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc;
+ ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc;
+ ptpsq->skb_fifo.mask = wq_sz - 1;
+ if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter))
+ ptpsq->ts_cqe_ctr_mask =
+ (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1;
+ return 0;
+}
+
+static void mlx5e_ptp_drain_skb_fifo(struct mlx5e_skb_fifo *skb_fifo)
+{
+ while (*skb_fifo->pc != *skb_fifo->cc) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(skb_fifo);
+
+ dev_kfree_skb_any(skb);
+ }
+}
+
+static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo)
+{
+ mlx5e_ptp_drain_skb_fifo(skb_fifo);
+ kvfree(skb_fifo->fifo);
+}
+
+static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
+ int txq_ix, struct mlx5e_ptp_params *cparams,
+ int tc, struct mlx5e_ptpsq *ptpsq)
+{
+ struct mlx5e_sq_param *sqp = &cparams->txq_sq_param;
+ struct mlx5e_txqsq *txqsq = &ptpsq->txqsq;
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_ptp_alloc_txqsq(c, txq_ix, &cparams->params, sqp,
+ txqsq, tc, ptpsq);
+ if (err)
+ return err;
+
+ csp.tisn = tisn;
+ csp.tis_lst_sz = 1;
+ csp.cqn = txqsq->cq.mcq.cqn;
+ csp.wq_ctrl = &txqsq->wq_ctrl;
+ csp.min_inline_mode = txqsq->min_inline_mode;
+ csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
+
+ err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
+ if (err)
+ goto err_free_txqsq;
+
+ err = mlx5e_ptp_alloc_traffic_db(ptpsq,
+ dev_to_node(mlx5_core_dma_dev(c->mdev)));
+ if (err)
+ goto err_free_txqsq;
+
+ return 0;
+
+err_free_txqsq:
+ mlx5e_free_txqsq(txqsq);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq)
+{
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct mlx5_core_dev *mdev = sq->mdev;
+
+ mlx5e_ptp_free_traffic_db(&ptpsq->skb_fifo);
+ cancel_work_sync(&sq->recover_work);
+ mlx5e_ptp_destroy_sq(mdev, sq->sqn);
+ mlx5e_free_txqsq_descs(sq);
+ mlx5e_free_txqsq(sq);
+}
+
+static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_params *params = &cparams->params;
+ u8 num_tc = mlx5e_get_dcb_num_tc(params);
+ int ix_base;
+ int err;
+ int tc;
+
+ ix_base = num_tc * params->num_channels;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ int txq_ix = ix_base + tc;
+
+ err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
+ cparams, tc, &c->ptpsq[tc]);
+ if (err)
+ goto close_txqsq;
+ }
+
+ return 0;
+
+close_txqsq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
+}
+
+static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_params *params = &cparams->params;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder ptp_moder = {};
+ struct mlx5e_cq_param *cq_param;
+ u8 num_tc;
+ int err;
+ int tc;
+
+ num_tc = mlx5e_get_dcb_num_tc(params);
+
+ ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
+ ccp.ch_stats = c->stats;
+ ccp.napi = &c->napi;
+ ccp.ix = MLX5E_PTP_CHANNEL_IX;
+
+ cq_param = &cparams->txq_sq_param.cqp;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq;
+
+ err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+ if (err)
+ goto out_err_txqsq_cq;
+ }
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq;
+ struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc];
+
+ err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+ if (err)
+ goto out_err_ts_cq;
+
+ ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc];
+ }
+
+ return 0;
+
+out_err_ts_cq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
+ tc = num_tc;
+out_err_txqsq_cq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
+
+ return err;
+}
+
+static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder ptp_moder = {};
+ struct mlx5e_cq_param *cq_param;
+ struct mlx5e_cq *cq = &c->rq.cq;
+
+ ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
+ ccp.ch_stats = c->stats;
+ ccp.napi = &c->napi;
+ ccp.ix = MLX5E_PTP_CHANNEL_IX;
+
+ cq_param = &cparams->rq_param.cqp;
+
+ return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+}
+
+static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
+}
+
+static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq;
+
+ mlx5e_build_sq_param_common(mdev, param);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ u16 q_counter,
+ struct mlx5e_ptp_params *ptp_params)
+{
+ struct mlx5e_rq_param *rq_params = &ptp_params->rq_param;
+ struct mlx5e_params *params = &ptp_params->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(mdev, params);
+ params->sw_mtu = netdev->max_mtu;
+ mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params);
+}
+
+static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams,
+ struct mlx5e_params *orig)
+{
+ struct mlx5e_params *params = &cparams->params;
+
+ params->tx_min_inline_mode = orig->tx_min_inline_mode;
+ params->num_channels = orig->num_channels;
+ params->hard_mtu = orig->hard_mtu;
+ params->sw_mtu = orig->sw_mtu;
+ params->mqprio = orig->mqprio;
+
+ /* SQ */
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ params->log_sq_size = orig->log_sq_size;
+ mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
+ }
+ /* RQ */
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ params->vlan_strip_disable = orig->vlan_strip_disable;
+ mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+ }
+}
+
+static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5e_priv *priv = c->priv;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = priv->netdev;
+ rq->priv = priv;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &c->priv->ptp_stats.rq;
+ rq->ix = MLX5E_PTP_CHANNEL_IX;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ err = mlx5e_rq_set_handlers(rq, params, false);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
+}
+
+static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int node = dev_to_node(c->mdev->device);
+ int err;
+
+ err = mlx5e_init_ptp_rq(c, params, &c->rq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq);
+}
+
+static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ int err;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ err = mlx5e_ptp_open_tx_cqs(c, cparams);
+ if (err)
+ return err;
+
+ err = mlx5e_ptp_open_txqsqs(c, cparams);
+ if (err)
+ goto close_tx_cqs;
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ err = mlx5e_ptp_open_rx_cq(c, cparams);
+ if (err)
+ goto close_txqsq;
+
+ err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param);
+ if (err)
+ goto close_rx_cq;
+ }
+ return 0;
+
+close_rx_cq:
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_close_cq(&c->rq.cq);
+close_txqsq:
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+ mlx5e_ptp_close_txqsqs(c);
+close_tx_cqs:
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+ mlx5e_ptp_close_tx_cqs(c);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c)
+{
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ mlx5e_close_rq(&c->rq);
+ mlx5e_close_cq(&c->rq.cq);
+ }
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ mlx5e_ptp_close_txqsqs(c);
+ mlx5e_ptp_close_tx_cqs(c);
+ }
+}
+
+static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
+{
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS))
+ __set_bit(MLX5E_PTP_STATE_TX, c->state);
+
+ if (params->ptp_rx)
+ __set_bit(MLX5E_PTP_STATE_RX, c->state);
+
+ return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
+}
+
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
+
+ if (!ptp_fs->valid)
+ return;
+
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
+
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
+ ptp_fs->valid = false;
+}
+
+static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
+{
+ u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
+ struct mlx5e_flow_steering *fs = priv->fs;
+ struct mlx5_flow_handle *rule;
+ struct mlx5e_ptp_fs *ptp_fs;
+ int err;
+
+ ptp_fs = mlx5e_fs_get_ptp(fs);
+ if (ptp_fs->valid)
+ return 0;
+
+ err = mlx5e_fs_tt_redirect_udp_create(fs);
+ if (err)
+ goto out_free;
+
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP,
+ tirn, PTP_EV_PORT);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_fs_udp;
+ }
+ ptp_fs->udp_v4_rule = rule;
+
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP,
+ tirn, PTP_EV_PORT);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_udp_v4_rule;
+ }
+ ptp_fs->udp_v6_rule = rule;
+
+ err = mlx5e_fs_tt_redirect_any_create(fs);
+ if (err)
+ goto out_destroy_udp_v6_rule;
+
+ rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_fs_any;
+ }
+ ptp_fs->l2_rule = rule;
+ ptp_fs->valid = true;
+
+ return 0;
+
+out_destroy_fs_any:
+ mlx5e_fs_tt_redirect_any_destroy(fs);
+out_destroy_udp_v6_rule:
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+out_destroy_udp_v4_rule:
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+out_destroy_fs_udp:
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
+out_free:
+ return err;
+}
+
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_ptp_params *cparams;
+ struct mlx5e_ptp *c;
+ int err;
+
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
+ cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
+ if (!c || !cparams)
+ return -ENOMEM;
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->pdev = mlx5_core_dma_dev(priv->mdev);
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
+ c->stats = &priv->ptp_stats.ch;
+ c->lag_port = lag_port;
+
+ err = mlx5e_ptp_set_state(c, params);
+ if (err)
+ goto err_free;
+
+ netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll);
+
+ mlx5e_ptp_build_params(c, cparams, params);
+
+ err = mlx5e_ptp_open_queues(c, cparams);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ priv->rx_ptp_opened = true;
+
+ *cp = c;
+
+ kvfree(cparams);
+
+ return 0;
+
+err_napi_del:
+ netif_napi_del(&c->napi);
+err_free:
+ kvfree(cparams);
+ kvfree(c);
+ return err;
+}
+
+void mlx5e_ptp_close(struct mlx5e_ptp *c)
+{
+ mlx5e_ptp_close_queues(c);
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+}
+
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ napi_enable(&c->napi);
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ mlx5e_ptp_rx_set_fs(c->priv);
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_sched(&c->napi);
+ }
+}
+
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_deactivate_rq(&c->rq);
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
+ }
+
+ napi_disable(&c->napi);
+}
+
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
+{
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return -EINVAL;
+
+ *rqn = c->rq.rqn;
+ return 0;
+}
+
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
+{
+ struct mlx5e_ptp_fs *ptp_fs;
+
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
+ return 0;
+
+ ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
+ if (!ptp_fs)
+ return -ENOMEM;
+ mlx5e_fs_set_ptp(fs, ptp_fs);
+
+ return 0;
+}
+
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
+{
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
+
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
+ return;
+
+ mlx5e_ptp_rx_unset_fs(fs);
+ kfree(ptp_fs);
+}
+
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
+{
+ struct mlx5e_ptp *c = priv->channels.ptp;
+
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ return 0;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ if (set) {
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules");
+ return -EINVAL;
+ }
+ return mlx5e_ptp_rx_set_fs(priv);
+ }
+ /* set == false */
+ if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
+ return -EINVAL;
+ }
+ mlx5e_ptp_rx_unset_fs(priv->fs);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
new file mode 100644
index 000000000000..cc7efde88ac3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PTP_H__
+#define __MLX5_EN_PTP_H__
+
+#include "en.h"
+#include "en_stats.h"
+#include "en/txrx.h"
+#include <linux/ptp_classify.h>
+
+#define MLX5E_PTP_CHANNEL_IX 0
+
+struct mlx5e_ptpsq {
+ struct mlx5e_txqsq txqsq;
+ struct mlx5e_cq ts_cq;
+ u16 skb_fifo_cc;
+ u16 skb_fifo_pc;
+ struct mlx5e_skb_fifo skb_fifo;
+ struct mlx5e_ptp_cq_stats *cq_stats;
+ u16 ts_cqe_ctr_mask;
+};
+
+enum {
+ MLX5E_PTP_STATE_TX,
+ MLX5E_PTP_STATE_RX,
+ MLX5E_PTP_STATE_NUM_STATES,
+};
+
+struct mlx5e_ptp {
+ /* data path */
+ struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq rq;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+ u8 num_tc;
+ u8 lag_port;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
+};
+
+static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+{
+ struct flow_keys fk;
+
+ if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ return false;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return false;
+
+ if (fk.basic.n_proto == htons(ETH_P_1588))
+ return true;
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) &&
+ fk.basic.n_proto != htons(ETH_P_IPV6))
+ return false;
+
+ return (fk.basic.ip_proto == IPPROTO_UDP &&
+ fk.ports.dst == htons(PTP_EV_PORT));
+}
+
+static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq)
+{
+ if (!sq->ptpsq)
+ return true;
+
+ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo);
+}
+
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp);
+void mlx5e_ptp_close(struct mlx5e_ptp *c);
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
+
+enum {
+ MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0),
+ MLX5E_SKB_CB_PORT_HWTSTAMP = BIT(1),
+};
+
+void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
+ ktime_t hwtstamp,
+ struct mlx5e_ptp_cq_stats *cq_stats);
+
+void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb);
+#endif /* __MLX5_EN_PTP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
new file mode 100644
index 000000000000..2842195ee548
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+#include <net/sch_generic.h>
+
+#include <net/pkt_cls.h>
+#include "en.h"
+#include "params.h"
+#include "../qos.h"
+#include "en/htb.h"
+
+struct qos_sq_callback_params {
+ struct mlx5e_priv *priv;
+ struct mlx5e_channels *chs;
+};
+
+int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes)
+{
+ if (nbytes < BYTES_IN_MBIT) {
+ qos_warn(mdev, "Input rate (%llu Bytes/sec) below minimum supported (%u Bytes/sec)\n",
+ nbytes, BYTES_IN_MBIT);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static u32 mlx5e_qos_bytes2mbits(struct mlx5_core_dev *mdev, u64 nbytes)
+{
+ return div_u64(nbytes, BYTES_IN_MBIT);
+}
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+ return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
+}
+
+/* TX datapath API */
+
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+{
+ /* These channel params are safe to access from the datapath, because:
+ * 1. This function is called only after checking selq->htb_maj_id != 0,
+ * and the number of queues can't change while HTB offload is active.
+ * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for
+ * mlx5e_select_queue to finish while holding priv->state_lock,
+ * preventing other code from changing the number of queues.
+ */
+ bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
+
+ return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
+}
+
+/* SQ lifecycle */
+
+static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_channel *c;
+ int ix;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ return mlx5e_state_dereference(priv, qos_sqs[qid]);
+}
+
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_sq_param param_sq;
+ struct mlx5e_cq_param param_cq;
+ int txq_ix, ix, qid, err = 0;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+
+ params = &chs->params;
+
+ txq_ix = mlx5e_qid_from_qos(chs, node_qid);
+
+ WARN_ON(node_qid > priv->htb_max_qos_sqs);
+ if (node_qid == priv->htb_max_qos_sqs) {
+ struct mlx5e_sq_stats *stats, **stats_list = NULL;
+
+ if (priv->htb_max_qos_sqs == 0) {
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list),
+ GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+ }
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats) {
+ kvfree(stats_list);
+ return -ENOMEM;
+ }
+ if (stats_list)
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
+ /* Order htb_max_qos_sqs increment after writing the array pointer.
+ * Pairs with smp_load_acquire in en_stats.c.
+ */
+ smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1);
+ }
+
+ ix = node_qid % params->num_channels;
+ qid = node_qid / params->num_channels;
+ c = chs->c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+
+ if (!sq)
+ return -ENOMEM;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ memset(&param_sq, 0, sizeof(param_sq));
+ memset(&param_cq, 0, sizeof(param_cq));
+ mlx5e_build_sq_param(priv->mdev, params, &param_sq);
+ mlx5e_build_tx_cq_param(priv->mdev, params, &param_cq);
+ err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
+ if (err)
+ goto err_free_sq;
+ err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
+ &param_sq, sq, 0, hw_id,
+ priv->htb_qos_sq_stats[node_qid]);
+ if (err)
+ goto err_close_cq;
+
+ rcu_assign_pointer(qos_sqs[qid], sq);
+
+ return 0;
+
+err_close_cq:
+ mlx5e_close_cq(&sq->cq);
+err_free_sq:
+ kfree(sq);
+ return err;
+}
+
+static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id)
+{
+ struct qos_sq_callback_params *cb_params = data;
+
+ return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id);
+}
+
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_priv *priv = data;
+ struct mlx5e_txqsq *sq;
+ u16 qid;
+
+ sq = mlx5e_get_qos_sq(priv, node_qid);
+
+ qid = mlx5e_qid_from_qos(&priv->channels, node_qid);
+
+ /* If it's a new queue, it will be marked as started at this point.
+ * Stop it before updating txq2sq.
+ */
+ mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
+
+ priv->txq2sq[qid] = sq;
+
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+
+ qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid);
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+}
+
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_get_qos_sq(priv, qid);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+
+ /* Make the change to txq2sq visible before the queue is started again.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+}
+
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+ int ix;
+
+ params = &priv->channels.params;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock));
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+}
+
+void mlx5e_qos_close_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock));
+ if (!qos_sqs)
+ return;
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+ }
+
+ kvfree(qos_sqs);
+}
+
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_close_queues(chs->c[i]);
+}
+
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ u16 qos_sqs_size;
+ int i;
+
+ qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num);
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL);
+ if (!sqs)
+ goto err_free;
+
+ WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size);
+ smp_wmb(); /* Pairs with mlx5e_napi_poll. */
+ rcu_assign_pointer(chs->c[i]->qos_sqs, sqs);
+ }
+
+ return 0;
+
+err_free:
+ while (--i >= 0) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL,
+ lockdep_is_held(&priv->state_lock));
+
+ synchronize_rcu(); /* Sync with NAPI. */
+ kvfree(sqs);
+ }
+ return -ENOMEM;
+}
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ struct qos_sq_callback_params callback_params;
+ int err;
+
+ err = mlx5e_qos_alloc_queues(priv, chs);
+ if (err)
+ return err;
+
+ callback_params.priv = priv;
+ callback_params.chs = chs;
+
+ err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params);
+ if (err) {
+ mlx5e_qos_close_all_queues(chs);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
+{
+ mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv);
+}
+
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_params *params = &c->priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
+ if (!qos_sqs)
+ return;
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ u16 qid = params->num_channels * i + c->ix;
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ /* The queue is disabled, no synchronization with datapath is needed. */
+ c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
+ }
+}
+
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_deactivate_queues(chs->c[i]);
+}
+
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+{
+ qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
+ netdev_tx_reset_queue(txq);
+ netif_tx_start_queue(txq);
+}
+
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+{
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+ if (!qdisc)
+ return;
+
+ spin_lock_bh(qdisc_lock(qdisc));
+ qdisc_reset(qdisc);
+ spin_unlock_bh(qdisc_lock(qdisc));
+}
+
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt)
+{
+ struct mlx5e_htb *htb = priv->htb;
+ int res;
+
+ if (!htb && htb_qopt->command != TC_HTB_CREATE)
+ return -EINVAL;
+
+ switch (htb_qopt->command) {
+ case TC_HTB_CREATE:
+ if (!mlx5_qos_is_supported(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(htb_qopt->extack,
+ "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+ priv->htb = mlx5e_htb_alloc();
+ htb = priv->htb;
+ if (!htb)
+ return -ENOMEM;
+ res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv);
+ if (res) {
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ }
+ return res;
+ case TC_HTB_DESTROY:
+ mlx5e_htb_cleanup(htb);
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ return 0;
+ case TC_HTB_LEAF_ALLOC_QUEUE:
+ res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ case TC_HTB_LEAF_TO_INNER:
+ return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL:
+ return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL_LAST:
+ case TC_HTB_LEAF_DEL_LAST_FORCE:
+ return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid,
+ htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+ htb_qopt->extack);
+ case TC_HTB_NODE_MODIFY:
+ return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil,
+ htb_qopt->extack);
+ case TC_HTB_LEAF_QUERY_QUEUE:
+ res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+struct mlx5e_mqprio_rl {
+ struct mlx5_core_dev *mdev;
+ u32 root_id;
+ u32 *leaves_id;
+ u8 num_tc;
+};
+
+struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_mqprio_rl), GFP_KERNEL);
+}
+
+void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl)
+{
+ kvfree(rl);
+}
+
+int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc,
+ u64 max_rate[])
+{
+ int err;
+ int tc;
+
+ if (!mlx5_qos_is_supported(mdev)) {
+ qos_warn(mdev, "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+ if (num_tc > mlx5e_qos_max_leaf_nodes(mdev))
+ return -EINVAL;
+
+ rl->mdev = mdev;
+ rl->num_tc = num_tc;
+ rl->leaves_id = kvcalloc(num_tc, sizeof(*rl->leaves_id), GFP_KERNEL);
+ if (!rl->leaves_id)
+ return -ENOMEM;
+
+ err = mlx5_qos_create_root_node(mdev, &rl->root_id);
+ if (err)
+ goto err_free_leaves;
+
+ qos_dbg(mdev, "Root created, id %#x\n", rl->root_id);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ u32 max_average_bw;
+
+ max_average_bw = mlx5e_qos_bytes2mbits(mdev, max_rate[tc]);
+ err = mlx5_qos_create_leaf_node(mdev, rl->root_id, 0, max_average_bw,
+ &rl->leaves_id[tc]);
+ if (err)
+ goto err_destroy_leaves;
+
+ qos_dbg(mdev, "Leaf[%d] created, id %#x, max average bw %u Mbits/sec\n",
+ tc, rl->leaves_id[tc], max_average_bw);
+ }
+ return 0;
+
+err_destroy_leaves:
+ while (--tc >= 0)
+ mlx5_qos_destroy_node(mdev, rl->leaves_id[tc]);
+ mlx5_qos_destroy_node(mdev, rl->root_id);
+err_free_leaves:
+ kvfree(rl->leaves_id);
+ return err;
+}
+
+void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
+
+ for (tc = 0; tc < rl->num_tc; tc++)
+ mlx5_qos_destroy_node(rl->mdev, rl->leaves_id[tc]);
+ mlx5_qos_destroy_node(rl->mdev, rl->root_id);
+ kvfree(rl->leaves_id);
+}
+
+int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id)
+{
+ if (tc >= rl->num_tc)
+ return -EINVAL;
+
+ *hw_id = rl->leaves_id[tc];
+ return 0;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
new file mode 100644
index 000000000000..4947afa23b73
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_QOS_H
+#define __MLX5E_EN_QOS_H
+
+#include <linux/mlx5/driver.h>
+
+#define BYTES_IN_MBIT 125000
+
+struct mlx5e_priv;
+struct mlx5e_htb;
+struct mlx5e_channels;
+struct mlx5e_channel;
+struct tc_htb_qopt_offload;
+
+int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes);
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+/* SQ lifecycle */
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id);
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id);
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq);
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid);
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs);
+void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs);
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+/* TX datapath API */
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid);
+
+/* HTB API */
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb);
+
+/* MQPRIO TX rate limit */
+struct mlx5e_mqprio_rl;
+struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void);
+void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl);
+int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc,
+ u64 max_rate[]);
+void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl);
+int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000000..b6f5c1bcdbcd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+ struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct list_head metadata_list;
+};
+
+struct mlx5e_rep_bond_slave_entry {
+ struct list_head list;
+ struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+ struct list_head list; /* link to global list of rep_bond_metadata */
+ struct mlx5_eswitch *esw;
+ /* private of uplink holding rep bond metadata list */
+ struct net_device *lag_dev;
+ u32 metadata_reg_c_0;
+
+ struct list_head slaves_list; /* slaves list */
+ int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_metadata *found = NULL;
+ struct mlx5e_rep_bond_metadata *cur;
+
+ list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+ if (cur->lag_dev == lag_dev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+ const struct net_device *netdev)
+{
+ struct mlx5e_rep_bond_slave_entry *found = NULL;
+ struct mlx5e_rep_bond_slave_entry *cur;
+
+ list_for_each_entry(cur, &mdata->slaves_list, list) {
+ if (cur->netdev == netdev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+ netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ list_del(&mdata->list);
+ mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
+ WARN_ON(!list_empty(&mdata->slaves_list));
+ kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+ int err;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata) {
+ /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+
+ mdata->lag_dev = lag_dev;
+ mdata->esw = esw;
+ INIT_LIST_HEAD(&mdata->slaves_list);
+ mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+ if (!mdata->metadata_reg_c_0) {
+ kfree(mdata);
+ return -ENOSPC;
+ }
+ list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+ netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ }
+
+ s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+ if (!s_entry) {
+ err = -ENOMEM;
+ goto entry_alloc_err;
+ }
+
+ s_entry->netdev = netdev;
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+ mdata->metadata_reg_c_0);
+ if (err)
+ goto ingress_err;
+
+ mdata->slaves++;
+ list_add_tail(&s_entry->list, &mdata->slaves_list);
+ netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ return 0;
+
+ingress_err:
+ kfree(s_entry);
+entry_alloc_err:
+ if (!mdata->slaves)
+ mlx5e_rep_bond_metadata_release(mdata);
+ return err;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata)
+ return;
+
+ s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+ if (!s_entry)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ /* Reset bond_metadata to zero first then reset all ingress/egress
+ * acls and rx rules of unslave representor's vport
+ */
+ mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+ mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
+ mlx5e_rep_bond_update(priv, false);
+
+ list_del(&s_entry->list);
+
+ netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ if (--mdata->slaves == 0)
+ mlx5e_rep_bond_metadata_release(mdata);
+ kfree(s_entry);
+}
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct netdev_lag_lower_state_info *lag_info;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+ struct list_head *iter;
+ struct net_device *dev;
+ u16 acl_vport_num;
+ u16 fwd_vport_num;
+ int err;
+
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+ if (!lag_info->tx_enabled)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ fwd_vport_num = rpriv->rep->vport;
+ lag_dev = netdev_master_upper_dev_get(netdev);
+ if (!lag_dev)
+ return;
+
+ netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+ lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+ /* Point everyone's egress acl to the vport of the active representor */
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ acl_vport_num = rpriv->rep->vport;
+ if (acl_vport_num != fwd_vport_num) {
+ /* Only single rx_rule for unique bond_metadata should be
+ * present, delete it if it's saved as passive vport's
+ * rx_rule with destination as passive vport's root_ft
+ */
+ mlx5e_rep_bond_update(priv, true);
+ err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+ fwd_vport_num,
+ acl_vport_num);
+ if (err)
+ netdev_warn(dev,
+ "configure slave vport(%d) egress fwd, err(%d)",
+ acl_vport_num, err);
+ }
+ }
+
+ /* Insert new rx_rule for unique bond_metadata, save it as active vport's
+ * rx_rule with new destination as active vport's root_ft
+ */
+ err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+ if (err)
+ netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+ fwd_vport_num, err);
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+
+ netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+ info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+ if (info->linking)
+ mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+ else
+ mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_bond *bond;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return NOTIFY_DONE;
+
+ bond = container_of(nb, struct mlx5e_rep_bond, nb);
+ priv = netdev_priv(netdev);
+ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
+ /* Verify VF representor is on the same device of the bond handling the netevent. */
+ if (rpriv->uplink_priv.bond != bond)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ mlx5e_rep_changelowerstate_event(netdev, ptr);
+ break;
+ case NETDEV_CHANGEUPPER:
+ mlx5e_rep_changeupper_event(netdev, ptr);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv;
+ int ret = 0;
+
+ priv = netdev_priv(netdev);
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+ goto out;
+
+ uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+ if (!uplink_priv->bond) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
+ uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+ ret = register_netdevice_notifier_dev_net(netdev,
+ &uplink_priv->bond->nb,
+ &uplink_priv->bond->nn);
+ if (ret) {
+ netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+ kvfree(uplink_priv->bond);
+ uplink_priv->bond = NULL;
+ }
+
+out:
+ return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+ !rpriv->uplink_priv.bond)
+ return;
+
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &rpriv->uplink_priv.bond->nb,
+ &rpriv->uplink_priv.bond->nn);
+ kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
new file mode 100644
index 000000000000..8099a21e674c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -0,0 +1,573 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+#include "bridge.h"
+#include "esw/bridge.h"
+#include "en_rep.h"
+
+#define MLX5_ESW_BRIDGE_UPDATE_INTERVAL 1000
+
+struct mlx5_bridge_switchdev_fdb_work {
+ struct work_struct work;
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct net_device *dev;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ bool add;
+};
+
+static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return esw == priv->mdev->priv.eswitch;
+}
+
+static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev, *esw_mdev;
+ u64 system_guid, esw_system_guid;
+
+ mdev = priv->mdev;
+ esw_mdev = esw->dev;
+
+ system_guid = mlx5_query_nic_system_image_guid(mdev);
+ esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev);
+
+ return system_guid == esw_system_guid;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
+ return lower;
+ }
+
+ return NULL;
+}
+
+static struct net_device *
+mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ if (netif_is_lag_master(dev))
+ dev = mlx5_esw_bridge_lag_rep_get(dev, esw);
+
+ if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw))
+ return NULL;
+
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ *vport_num = rpriv->rep->vport;
+ *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id);
+ return dev;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev))
+ return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num,
+ esw_owner_vhca_id);
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ struct net_device *rep;
+
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num,
+ esw_owner_vhca_id);
+ if (rep)
+ return rep;
+ }
+
+ return NULL;
+}
+
+static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5_esw_bridge_dev_same_esw(rep, esw))
+ return false;
+
+ priv = netdev_priv(rep);
+ mdev = priv->mdev;
+ if (netif_is_lag_master(dev))
+ return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
+ return true;
+}
+
+static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ netdev_nb);
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev, *rep;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ u16 vport_num, esw_owner_vhca_id;
+ struct netlink_ext_ack *extack;
+ int ifindex = upper->ifindex;
+ int err = 0;
+
+ if (!netif_is_bridge_master(upper))
+ return 0;
+
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+ else if (mlx5_esw_bridge_dev_same_hw(rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+
+ return err;
+}
+
+static int
+mlx5_esw_bridge_changeupper_validate_netdev(void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev))
+ return 0;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (!mlx5_lag_is_active(mdev))
+ return -EAGAIN;
+ if (!mlx5_lag_is_shared_fdb(mdev))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ int err = 0;
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr);
+ break;
+
+ case NETDEV_CHANGEUPPER:
+ err = mlx5_esw_bridge_port_changeupper(nb, ptr);
+ break;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static int
+mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+ const struct switchdev_obj *obj = port_obj_info->obj;
+ const struct switchdev_obj_port_vlan *vlan;
+ u16 vport_num, esw_owner_vhca_id;
+ int err;
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
+ vlan->flags, br_offloads, extack);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static int
+mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ const struct switchdev_obj *obj = port_obj_info->obj;
+ const struct switchdev_obj_port_vlan *vlan;
+ u16 vport_num, esw_owner_vhca_id;
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+ struct switchdev_notifier_port_attr_info *port_attr_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+ const struct switchdev_attr *attr = port_attr_info->attr;
+ u16 vport_num, esw_owner_vhca_id;
+ int err = 0;
+
+ if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_attr_info->handled = true;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+ if (attr->u.brport_flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flag is not supported");
+ err = -EINVAL;
+ }
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id,
+ attr->u.ageing_time, br_offloads);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
+ attr->u.vlan_filtering, br_offloads);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL:
+ err = mlx5_esw_bridge_vlan_proto_set(vport_num,
+ esw_owner_vhca_id,
+ attr->u.vlan_protocol,
+ br_offloads);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb_blk);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads);
+ break;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads);
+ break;
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+ break;
+ default:
+ err = 0;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static void
+mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work)
+{
+ dev_put(fdb_work->dev);
+ kfree(fdb_work->fdb_info.addr);
+ kfree(fdb_work);
+}
+
+static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work)
+{
+ struct mlx5_bridge_switchdev_fdb_work *fdb_work =
+ container_of(work, struct mlx5_bridge_switchdev_fdb_work, work);
+ struct switchdev_notifier_fdb_info *fdb_info =
+ &fdb_work->fdb_info;
+ struct mlx5_esw_bridge_offloads *br_offloads =
+ fdb_work->br_offloads;
+ struct net_device *dev = fdb_work->dev;
+ u16 vport_num, esw_owner_vhca_id;
+
+ rtnl_lock();
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ goto out;
+
+ if (fdb_work->add)
+ mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ else
+ mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+
+out:
+ rtnl_unlock();
+ mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work);
+}
+
+static struct mlx5_bridge_switchdev_fdb_work *
+mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
+ struct switchdev_notifier_fdb_info *fdb_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_bridge_switchdev_fdb_work *work;
+ u8 *addr;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work);
+ memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info));
+
+ addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+ if (!addr) {
+ kfree(work);
+ return ERR_PTR(-ENOMEM);
+ }
+ ether_addr_copy(addr, fdb_info->addr);
+ work->fdb_info.addr = addr;
+
+ dev_hold(dev);
+ work->dev = dev;
+ work->br_offloads = br_offloads;
+ work->add = add;
+ return work;
+}
+
+static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct mlx5_bridge_switchdev_fdb_work *work;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct switchdev_notifier_info *info = ptr;
+ u16 vport_num, esw_owner_vhca_id;
+ struct net_device *upper, *rep;
+
+ if (event == SWITCHDEV_PORT_ATTR_SET) {
+ int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+
+ return notifier_from_errno(err);
+ }
+
+ upper = netdev_master_upper_dev_get_rcu(dev);
+ if (!upper)
+ return NOTIFY_DONE;
+ if (!netif_is_bridge_master(upper))
+ return NOTIFY_DONE;
+
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+ /* only handle the event on native eswtich of representor */
+ if (!mlx5_esw_bridge_is_local(dev, rep, esw))
+ break;
+
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ /* only handle the event on peers */
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ break;
+ fallthrough;
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+
+ work = mlx5_esw_bridge_init_switchdev_fdb_work(dev,
+ event == SWITCHDEV_FDB_ADD_TO_DEVICE,
+ fdb_info,
+ br_offloads);
+ if (IS_ERR(work)) {
+ WARN_ONCE(1, "Failed to init switchdev work, err=%ld",
+ PTR_ERR(work));
+ return notifier_from_errno(PTR_ERR(work));
+ }
+
+ queue_work(br_offloads->wq, &work->work);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static void mlx5_esw_bridge_update_work(struct work_struct *work)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(work,
+ struct mlx5_esw_bridge_offloads,
+ update_work.work);
+
+ rtnl_lock();
+ mlx5_esw_bridge_update(br_offloads);
+ rtnl_unlock();
+
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+}
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw =
+ mdev->priv.eswitch;
+ int err;
+
+ rtnl_lock();
+ br_offloads = mlx5_esw_bridge_init(esw);
+ rtnl_unlock();
+ if (IS_ERR(br_offloads)) {
+ esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads));
+ return;
+ }
+
+ br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0);
+ if (!br_offloads->wq) {
+ esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n");
+ goto err_alloc_wq;
+ }
+
+ br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event;
+ err = register_switchdev_notifier(&br_offloads->nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err);
+ goto err_register_swdev;
+ }
+
+ br_offloads->nb_blk.notifier_call = mlx5_esw_bridge_event_blocking;
+ err = register_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ if (err) {
+ esw_warn(mdev, "Failed to register blocking switchdev notifier (err=%d)\n", err);
+ goto err_register_swdev_blk;
+ }
+
+ br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+ err);
+ goto err_register_netdev;
+ }
+ INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work);
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+ return;
+
+err_register_netdev:
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+err_register_swdev_blk:
+ unregister_switchdev_notifier(&br_offloads->nb);
+err_register_swdev:
+ destroy_workqueue(br_offloads->wq);
+err_alloc_wq:
+ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
+}
+
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw =
+ mdev->priv.eswitch;
+
+ br_offloads = esw->br_offloads;
+ if (!br_offloads)
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ unregister_switchdev_notifier(&br_offloads->nb);
+ destroy_workqueue(br_offloads->wq);
+ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
new file mode 100644
index 000000000000..fbeb64242831
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_BRIDGE__
+#define __MLX5_EN_REP_BRIDGE__
+
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_MLX5_BRIDGE)
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv);
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_BRIDGE */
+
+static inline void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_BRIDGE */
+
+#endif /* __MLX5_EN_REP_BRIDGE__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
new file mode 100644
index 000000000000..2e9bee4e5209
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+#include "neigh.h"
+#include "tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+#include "diag/en_rep_tracepoint.h"
+
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+ if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+ return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+ return ~0UL;
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+ unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt)) {
+ mlx5e_rep_neigh_entry_remove(nhe);
+ kfree_rcu(nhe, rcu);
+ }
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh_hash_entry *next = NULL;
+
+ rcu_read_lock();
+
+ for (next = nhe ?
+ list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &nhe->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list) :
+ list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list);
+ next;
+ next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &next->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list))
+ if (mlx5e_rep_neigh_entry_hold(next))
+ break;
+
+ rcu_read_unlock();
+
+ if (nhe)
+ mlx5e_rep_neigh_entry_release(nhe);
+
+ return next;
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+struct neigh_update_work {
+ struct work_struct work;
+ struct neighbour *n;
+ struct mlx5e_neigh_hash_entry *nhe;
+};
+
+static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
+{
+ neigh_release(update_work->n);
+ mlx5e_rep_neigh_entry_release(update_work->nhe);
+ kfree(update_work);
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
+ work);
+ struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+ struct neighbour *n = update_work->n;
+ struct mlx5e_encap_entry *e = NULL;
+ bool neigh_connected, same_dev;
+ unsigned char ha[ETH_ALEN];
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+ if (!same_dev)
+ goto out;
+
+ /* mlx5e_get_next_init_encap() releases previous encap before returning
+ * the next one.
+ */
+ while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
+ mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
+
+out:
+ rtnl_unlock();
+ mlx5e_release_neigh_update_work(update_work);
+}
+
+static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
+ struct neighbour *n)
+{
+ struct neigh_update_work *update_work;
+ struct mlx5e_neigh_hash_entry *nhe;
+ struct mlx5e_neigh m_neigh = {};
+
+ update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
+ if (WARN_ON(!update_work))
+ return NULL;
+
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* Obtain reference to nhe as last step in order not to release it in
+ * atomic context.
+ */
+ rcu_read_lock();
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ rcu_read_unlock();
+ if (!nhe) {
+ kfree(update_work);
+ return NULL;
+ }
+
+ INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
+ neigh_hold(n);
+ update_work->n = n;
+ update_work->nhe = nhe;
+
+ return update_work;
+}
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct neigh_update_work *update_work;
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ update_work = mlx5e_alloc_neigh_update_work(priv, n);
+ if (!update_work)
+ return NOTIFY_DONE;
+
+ queue_work(priv->wq, &update_work->work);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+ neigh_list) {
+ if (p->dev == READ_ONCE(nhe->neigh_dev)) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ goto out_err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ mutex_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&neigh_update->netevent_nb);
+ if (err)
+ goto out_notifier;
+ return 0;
+
+out_notifier:
+ neigh_update->netevent_nb.notifier_call = NULL;
+ rhashtable_destroy(&neigh_update->neigh_ht);
+out_err:
+ netdev_warn(rpriv->netdev,
+ "Failed to initialize neighbours handling for vport %d\n",
+ rpriv->rep->vport);
+ return err;
+}
+
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!rpriv->neigh_update.netevent_nb.notifier_call)
+ return;
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ mutex_destroy(&neigh_update->encap_lock);
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+
+ list_del_rcu(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
+ */
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+ return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+}
+
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ (*nhe)->priv = priv;
+ memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
+ spin_lock_init(&(*nhe)->encap_list_lock);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+ WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
new file mode 100644
index 000000000000..6fe0ab970943
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_NEIGH__
+#define __MLX5_EN_REP_NEIGH__
+
+#include "en.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
+ struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline int
+mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
new file mode 100644
index 000000000000..fac7e3ff2674
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -0,0 +1,900 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <linux/if_macvlan.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include "tc.h"
+#include "neigh.h"
+#include "en_rep.h"
+#include "eswitch.h"
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/mapping.h"
+#include "en/tc_tun.h"
+#include "lib/port_tun.h"
+#include "en/tc/sample.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en/tc/int_port.h"
+#include "en/tc/act/act.h"
+
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+ enum flow_block_binder_type binder_type;
+
+ struct list_head list;
+};
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+ if (err)
+ return err;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
+ if (err) {
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+ mlx5_tun_entropy_refcount_dec(tun_entropy,
+ e->reformat_type);
+ return err;
+ }
+ }
+
+ e->nhe = nhe;
+ spin_lock(&nhe->encap_list_lock);
+ list_add_rcu(&e->encap_list, &nhe->encap_list);
+ spin_unlock(&nhe->encap_list_lock);
+
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+
+ if (!e->nhe)
+ return;
+
+ spin_lock(&e->nhe->encap_list_lock);
+ list_del_rcu(&e->encap_list);
+ spin_unlock(&e->nhe->encap_list_lock);
+
+ mlx5e_rep_neigh_entry_release(e->nhe);
+ e->nhe = NULL;
+ mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+}
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool encap_connected;
+ LIST_HEAD(flow_list);
+
+ ASSERT_RTNL();
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+
+ if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+ (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+ mlx5e_tc_encap_flows_del(priv, e, &flow_list);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ struct net_device *route_dev;
+
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+ /* Update the encap source mac, in case that we delete
+ * the flows when encap source mac changed.
+ */
+ route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
+ if (route_dev)
+ ether_addr_copy(eth->h_source, route_dev->dev_addr);
+
+ mlx5e_tc_encap_flows_add(priv, e, &flow_list);
+ }
+unlock:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ mlx5e_put_flow_list(priv, &flow_list);
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ switch (ma->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlx5e_tc_configure_matchall(priv, ma);
+ case TC_CLSMATCHALL_DESTROY:
+ return mlx5e_tc_delete_matchall(priv, ma);
+ case TC_CLSMATCHALL_STATS:
+ mlx5e_tc_stats_matchall(priv, ma);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!priv->netdev || !netif_device_present(priv->netdev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+ case TC_SETUP_CLSMATCHALL:
+ return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload tmp, *f = type_data;
+ struct mlx5e_priv *priv = cb_priv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ flags = MLX5_TC_FLAG(INGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+ esw = priv->mdev->priv.eswitch;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ if (!mlx5_chains_prios_supported(esw_chains(esw)))
+ return -EOPNOTSUPP;
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
+ return -EOPNOTSUPP;
+ if (tmp.common.chain_index != 0)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
+ tmp.common.prio++;
+ err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
+static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct flow_block_offload *f = type_data;
+
+ f->unlocked_driver_cb = true;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_tc_cb_list,
+ mlx5e_rep_setup_tc_cb,
+ priv, priv, true);
+ case TC_SETUP_FT:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_ft_cb_list,
+ mlx5e_rep_setup_ft_cb,
+ priv, priv, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ int err;
+
+ mutex_init(&uplink_priv->unready_flows_lock);
+ INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(uplink_priv);
+ return err;
+}
+
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
+ mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+ mlx5e_tc_reoffload_flows_work);
+}
+
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+}
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+ return NOTIFY_OK;
+}
+
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev,
+ enum flow_block_binder_type binder_type)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev &&
+ cb_priv->binder_type == binder_type)
+ return cb_priv;
+
+ return NULL;
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct flow_cls_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv,
+ unsigned long flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int err = 0;
+
+ if (!netif_device_present(indr_priv->rpriv->netdev))
+ return -EOPNOTSUPP;
+
+ switch (flower->command) {
+ case FLOW_CLS_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ?
+ MLX5_TC_FLAG(EGRESS) :
+ MLX5_TC_FLAG(INGRESS);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+ struct flow_cls_offload *f = type_data;
+ struct flow_cls_offload tmp;
+ struct mlx5e_priv *mpriv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ mpriv = netdev_priv(priv->rpriv->netdev);
+ esw = mpriv->mdev->priv.eswitch;
+
+ flags = MLX5_TC_FLAG(EGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
+ tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
+ tmp.common.chain_index)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
+ tmp.common.prio++;
+ err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mlx5e_rep_indr_block_unbind(void *cb_priv)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->mode == MACVLAN_MODE_PASSTHRU;
+}
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool is_ovs_int_port = netif_is_ovs_master(netdev);
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
+ !is_ovs_int_port) {
+ if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
+ return -EOPNOTSUPP;
+ if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
+ netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ return -EOPNOTSUPP;
+
+ if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
+ return -EOPNOTSUPP;
+
+ if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
+ return -EOPNOTSUPP;
+
+ f->unlocked_driver_cb = true;
+ f->driver_block_list = &mlx5e_block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ indr_priv->binder_type = f->binder_type;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
+ mlx5e_rep_indr_block_unbind,
+ f, netdev, sch, data, rpriv,
+ cleanup);
+ if (IS_ERR(block_cb)) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ return PTR_ERR(block_cb);
+ }
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
+ if (!indr_priv)
+ return -ENOENT;
+
+ block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_indr_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct flow_action_entry *action;
+ struct mlx5e_tc_act *act;
+ bool add = false;
+ int i;
+
+ /* There is no use case currently for more than one action (e.g. pedit).
+ * when there will be, need to handle cleaning multiple actions on err.
+ */
+ if (!flow_offload_has_one_action(&fl_act->action))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ flow_action_for_each(i, action, &fl_act->action) {
+ act = mlx5e_tc_act_get(action->id, ns_type);
+ if (!act)
+ continue;
+
+ if (!act->offload_action)
+ continue;
+
+ if (!act->offload_action(priv, fl_act, action))
+ add = true;
+ }
+
+ return add ? 0 : -EOPNOTSUPP;
+}
+
+static int
+mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->destroy_action)
+ return -EOPNOTSUPP;
+
+ return act->destroy_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->stats_action)
+ return -EOPNOTSUPP;
+
+ return act->stats_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ switch (fl_act->command) {
+ case FLOW_ACT_REPLACE:
+ return mlx5e_rep_indr_replace_act(rpriv, fl_act);
+ case FLOW_ACT_DESTROY:
+ return mlx5e_rep_indr_destroy_act(rpriv, fl_act);
+ case FLOW_ACT_STATS:
+ return mlx5e_rep_indr_stats_act(rpriv, fl_act);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv,
+ enum tc_setup_type type,
+ void *data)
+{
+ if (!data)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_ACT:
+ return mlx5e_rep_indr_setup_act(rpriv, data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
+ enum tc_setup_type type, void *type_data,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ if (!netdev)
+ return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data);
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
+ mlx5e_rep_indr_setup_tc_cb,
+ data, cleanup);
+ case TC_SETUP_FT:
+ return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
+ mlx5e_rep_indr_setup_ft_cb,
+ data, cleanup);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+
+ return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
+}
+
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
+{
+ flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
+ mlx5e_rep_indr_block_unbind);
+}
+
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv,
+ u32 tunnel_id)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct tunnel_match_enc_opts enc_opts = {};
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct metadata_dst *tun_dst;
+ struct tunnel_match_key key;
+ u32 tun_id, enc_opts_id;
+ struct net_device *dev;
+ int err;
+
+ enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+ tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+ if (!tun_id)
+ return true;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+ if (err) {
+ WARN_ON_ONCE(true);
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel for tun_id: %d, err: %d\n",
+ tun_id, err);
+ return false;
+ }
+
+ if (enc_opts_id) {
+ err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id, &enc_opts);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+ enc_opts_id, err);
+ return false;
+ }
+ }
+
+ if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ key.enc_tp.dst, TUNNEL_KEY,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ enc_opts.key.len);
+ } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ key.enc_tp.dst, 0, TUNNEL_KEY,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ enc_opts.key.len);
+ } else {
+ netdev_dbg(priv->netdev,
+ "Couldn't restore tunnel, unsupported addr_type: %d\n",
+ key.enc_control.addr_type);
+ return false;
+ }
+
+ if (!tun_dst) {
+ netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
+ return false;
+ }
+
+ tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
+
+ if (enc_opts.key.len)
+ ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+ enc_opts.key.data,
+ enc_opts.key.len,
+ enc_opts.key.dst_opt_type);
+
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+ dev = dev_get_by_index(&init_net, key.filter_ifindex);
+ if (!dev) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel device with ifindex: %d\n",
+ key.filter_ifindex);
+ return false;
+ }
+
+ /* Set fwd_dev so we do dev_put() after datapath */
+ tc_priv->fwd_dev = dev;
+
+ skb->dev = dev;
+
+ return true;
+}
+
+static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
+ u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (chain) {
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5_eswitch *esw;
+ u32 zone_restore_id;
+
+ tc_skb_ext = tc_skb_ext_alloc(skb);
+ if (!tc_skb_ext) {
+ WARN_ON(1);
+ return false;
+ }
+ tc_skb_ext->chain = chain;
+ zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
+ esw = priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
+ zone_restore_id))
+ return false;
+ }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+}
+
+static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (tc_priv->fwd_dev)
+ dev_put(tc_priv->fwd_dev);
+}
+
+static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
+ netdev_dbg(priv->netdev,
+ "Failed to restore tunnel info for sampled packet\n");
+ return;
+ }
+ mlx5e_tc_sample_skb(skb, mapped_obj);
+ mlx5_rep_tc_post_napi_receive(tc_priv);
+}
+
+static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv,
+ bool *forward_tx,
+ u32 reg_c1)
+{
+ u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ /* Tunnel restore takes precedence over int port restore */
+ if (tunnel_id)
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
+ mapped_obj->int_port_metadata, forward_tx)) {
+ /* Set fwd_dev for future dev_put */
+ tc_priv->fwd_dev = skb->dev;
+
+ return true;
+ }
+
+ return false;
+}
+
+void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb)
+{
+ u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_tc_update_priv tc_priv = {};
+ struct mlx5_mapped_obj mapped_obj;
+ struct mlx5_eswitch *esw;
+ bool forward_tx = false;
+ struct mlx5e_priv *priv;
+ u32 reg_c0;
+ int err;
+
+ reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+ if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+ goto forward;
+
+ /* If reg_c0 is not equal to the default flow tag then skb->mark
+ * is not supported and must be reset back to 0.
+ */
+ skb->mark = 0;
+
+ priv = netdev_priv(skb->dev);
+ esw = priv->mdev->priv.eswitch;
+ err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find mapped object for reg_c0: %d, err: %d\n",
+ reg_c0, err);
+ goto free_skb;
+ }
+
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
+ !mlx5_ipsec_is_rx_flow(cqe))
+ goto free_skb;
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+ mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
+ goto free_skb;
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
+ if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
+ &forward_tx, reg_c1))
+ goto free_skb;
+ } else {
+ netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+ goto free_skb;
+ }
+
+forward:
+ if (forward_tx)
+ dev_queue_xmit(skb);
+ else
+ napi_gro_receive(rq->cq.napi, skb);
+
+ mlx5_rep_tc_post_napi_receive(&tc_priv);
+
+ return;
+
+free_skb:
+ dev_kfree_skb_any(skb);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
new file mode 100644
index 000000000000..7c9dd3a75f8a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_TC_H__
+#define __MLX5_EN_REP_TC_H__
+
+#include <linux/skbuff.h>
+#include "en_tc.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv);
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv);
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN]);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+
+void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5e_rep_priv;
+static inline int
+mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+static inline int
+mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {}
+
+static inline void
+mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {}
+static inline void
+mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {}
+
+static inline int
+mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; }
+
+static inline int
+mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data) { return -EOPNOTSUPP; }
+
+static inline void
+mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); }
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 6c72b592315b..5f6f95ad6888 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -3,6 +3,10 @@
#include "health.h"
#include "params.h"
+#include "txrx.h"
+#include "devlink.h"
+#include "ptp.h"
+#include "lib/tout.h"
static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
{
@@ -29,7 +33,10 @@ out:
static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
{
- unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+ struct mlx5_core_dev *dev = icosq->channel->mdev;
+ unsigned long exp_time;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
while (time_before(jiffies, exp_time)) {
if (icosq->cc == icosq->pc)
@@ -55,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
{
+ struct mlx5e_rq *xskrq = NULL;
struct mlx5_core_dev *mdev;
struct mlx5e_icosq *icosq;
struct net_device *dev;
@@ -63,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
int err;
icosq = ctx;
+
+ mutex_lock(&icosq->channel->icosq_recovery_lock);
+
+ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
rq = &icosq->channel->rq;
+ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
+ xskrq = &icosq->channel->xskrq;
mdev = icosq->channel->mdev;
dev = icosq->channel->netdev;
err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
@@ -77,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_deactivate_rq(rq);
+ if (xskrq)
+ mlx5e_deactivate_rq(xskrq);
+
err = mlx5e_wait_for_icosq_flush(icosq);
if (err)
goto out;
@@ -85,135 +102,72 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
/* At this point, both the rq and the icosq are disabled */
- err = mlx5e_health_sq_to_ready(icosq->channel, icosq->sqn);
+ err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn);
if (err)
goto out;
mlx5e_reset_icosq_cc_pc(icosq);
- mlx5e_free_rx_descs(rq);
+
+ mlx5e_free_rx_in_progress_descs(rq);
+ if (xskrq)
+ mlx5e_free_rx_in_progress_descs(xskrq);
+
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
- mlx5e_activate_rq(rq);
+ mlx5e_activate_rq(rq);
rq->stats->recover++;
- return 0;
-out:
- clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
- return err;
-}
-void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
-{
- struct mlx5e_priv *priv = icosq->channel->priv;
- char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
- struct mlx5e_err_ctx err_ctx = {};
-
- err_ctx.ctx = icosq;
- err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
- sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
-
- mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
-}
+ if (xskrq) {
+ mlx5e_activate_rq(xskrq);
+ xskrq->stats->recover++;
+ }
-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
-{
- struct net_device *dev = rq->netdev;
- int err;
+ mlx5e_trigger_napi_icosq(icosq->channel);
- err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
- if (err) {
- netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
- return err;
- }
- err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
- if (err) {
- netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
- return err;
- }
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+ return err;
}
static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
{
- struct mlx5_core_dev *mdev;
- struct net_device *dev;
- struct mlx5e_rq *rq;
- u8 state;
+ struct mlx5e_rq *rq = ctx;
int err;
- rq = ctx;
- mdev = rq->mdev;
- dev = rq->netdev;
- err = mlx5e_query_rq_state(mdev, rq->rqn, &state);
- if (err) {
- netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n",
- rq->rqn, err);
- goto out;
- }
-
- if (state != MLX5_RQC_STATE_ERR)
- goto out;
-
mlx5e_deactivate_rq(rq);
- mlx5e_free_rx_descs(rq);
-
- err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
+ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
+ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
if (err)
- goto out;
+ return err;
- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
mlx5e_activate_rq(rq);
rq->stats->recover++;
+ if (rq->channel)
+ mlx5e_trigger_napi_icosq(rq->channel);
+ else
+ mlx5e_trigger_napi_sched(rq->cq.napi);
return 0;
-out:
- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
- return err;
-}
-
-void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
-{
- struct mlx5e_priv *priv = rq->channel->priv;
- char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
- struct mlx5e_err_ctx err_ctx = {};
-
- err_ctx.ctx = rq;
- err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
- sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn);
-
- mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
}
static int mlx5e_rx_reporter_timeout_recover(void *ctx)
{
- struct mlx5e_icosq *icosq;
struct mlx5_eq_comp *eq;
struct mlx5e_rq *rq;
int err;
rq = ctx;
- icosq = &rq->channel->icosq;
eq = rq->cq.mcq.eq;
- err = mlx5e_health_channel_eq_recover(eq, rq->channel);
- if (err)
- clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
-
- return err;
-}
-
-void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
-{
- struct mlx5e_icosq *icosq = &rq->channel->icosq;
- struct mlx5e_priv *priv = rq->channel->priv;
- char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
- struct mlx5e_err_ctx err_ctx = {};
- err_ctx.ctx = rq;
- err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
- sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
- icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
+ err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
+ if (err && rq->icosq)
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
- mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+ return err;
}
static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
@@ -232,40 +186,77 @@ static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
mlx5e_health_recover_channels(priv);
}
-static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
- struct devlink_fmsg *fmsg)
+static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
+ struct devlink_fmsg *fmsg)
{
- struct mlx5e_priv *priv = rq->channel->priv;
- struct mlx5e_params *params;
- struct mlx5e_icosq *icosq;
- u8 icosq_hw_state;
- int wqes_sz;
- u8 hw_state;
- u16 wq_head;
int err;
- params = &priv->channels.params;
- icosq = &rq->channel->icosq;
- err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state);
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
if (err)
return err;
- err = mlx5_core_query_sq_state(priv->mdev, icosq->sqn, &icosq_hw_state);
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn);
if (err)
return err;
- wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
- wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
- rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq);
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+ if (err)
+ return err;
- err = devlink_fmsg_obj_nest_start(fmsg);
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "WQE size",
+ mlx5_wq_cyc_get_size(&icosq->wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
if (err)
return err;
- err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->channel->ix);
+ err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn);
if (err)
return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ u16 wqe_counter;
+ int wqes_sz;
+ u8 hw_state;
+ u16 wq_head;
+ int err;
+
+ err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state);
+ if (err)
+ return err;
+
+ wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
+ wq_head = mlx5e_rqwq_get_head(rq);
+ wqe_counter = mlx5e_rqwq_get_wqe_counter(rq);
+
err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
if (err)
return err;
@@ -278,6 +269,10 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
if (err)
return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
+ if (err)
+ return err;
+
err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
if (err)
return err;
@@ -286,11 +281,152 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
if (err)
return err;
- err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state);
+ err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg);
+ if (err)
+ return err;
+
+ if (rq->icosq) {
+ struct mlx5e_icosq *icosq = rq->icosq;
+ u8 icosq_hw_state;
+
+ err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state);
+ if (err)
+ return err;
+
+ err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_params *params;
+ u32 rq_stride, rq_sz;
+ bool real_time;
+ int err;
+
+ params = &priv->channels.params;
+ rq_sz = mlx5e_rqwq_get_size(rq);
+ real_time = mlx5_is_real_time_rq(priv->mdev);
+ rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
if (err)
return err;
- err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg);
+ err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg);
+ if (err)
+ return err;
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg);
+ if (err)
+ return err;
+ }
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
if (err)
return err;
@@ -306,9 +442,7 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
- struct mlx5e_params *params = &priv->channels.params;
- struct mlx5e_rq *generic_rq;
- u32 rq_stride, rq_sz;
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
int i, err = 0;
mutex_lock(&priv->state_lock);
@@ -316,85 +450,299 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
- generic_rq = &priv->channels.c[0]->rq;
- rq_sz = mlx5e_rqwq_get_size(generic_rq);
- rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
-
- err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config");
+ err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg);
if (err)
goto unlock;
- err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ");
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
if (err)
goto unlock;
- err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
+
+ err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
+ if (err)
+ goto unlock;
+ }
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
+ if (err)
+ goto unlock;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_txqsq *icosq = ctx;
+ struct mlx5_rsc_key key = {};
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
if (err)
- goto unlock;
+ return err;
- err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
- goto unlock;
+ return err;
- err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
if (err)
- goto unlock;
+ return err;
- err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
if (err)
- goto unlock;
+ return err;
- err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg);
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
if (err)
- goto unlock;
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = icosq->sqn;
+ key.num_of_obj1 = 1;
- err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
- goto unlock;
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5_rsc_key key = {};
+ struct mlx5e_rq *rq = ctx;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = rq->rqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_RCV_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5_rsc_key key = {};
+ int i, err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
if (err)
- goto unlock;
+ return err;
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
- err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
+ err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ");
if (err)
- goto unlock;
+ return err;
}
- err = devlink_fmsg_arr_pair_nest_end(fmsg);
- if (err)
- goto unlock;
-unlock:
- mutex_unlock(&priv->state_lock);
- return err;
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ");
+ if (err)
+ return err;
+ }
+
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+ struct mlx5e_err_ctx *err_ctx,
+ struct devlink_fmsg *fmsg)
+{
+ return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+ mlx5e_rx_reporter_dump_all_rqs(priv, fmsg);
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+ char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {};
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_rq;
+
+ if (icosq)
+ snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
+ snprintf(err_str, sizeof(err_str),
+ "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
+ rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_rq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+{
+ struct mlx5e_priv *priv = icosq->channel->priv;
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = icosq;
+ err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_icosq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
+{
+ mutex_lock(&c->icosq_recovery_lock);
+}
+
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
+{
+ mutex_unlock(&c->icosq_recovery_lock);
}
static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
.name = "rx",
.recover = mlx5e_rx_reporter_recover,
.diagnose = mlx5e_rx_reporter_diagnose,
+ .dump = mlx5e_rx_reporter_dump,
};
#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
-int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
+void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
{
- struct devlink *devlink = priv_to_devlink(priv->mdev);
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
struct devlink_health_reporter *reporter;
- reporter = devlink_health_reporter_create(devlink,
- &mlx5_rx_reporter_ops,
- MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
- true, priv);
+ reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops,
+ MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
PTR_ERR(reporter));
- return PTR_ERR(reporter);
+ return;
}
priv->rx_reporter = reporter;
- return 0;
}
void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
@@ -402,5 +750,6 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
if (!priv->rx_reporter)
return;
- devlink_health_reporter_destroy(priv->rx_reporter);
+ devlink_port_health_reporter_destroy(priv->rx_reporter);
+ priv->rx_reporter = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index b468549e96ff..60bc5b577ab9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -2,10 +2,16 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "health.h"
+#include "en/ptp.h"
+#include "en/devlink.h"
+#include "lib/tout.h"
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
{
- unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+ struct mlx5_core_dev *dev = sq->mdev;
+ unsigned long exp_time;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
while (time_before(jiffies, exp_time)) {
if (sq->cc == sq->pc)
@@ -14,7 +20,7 @@ static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
msleep(20);
}
- netdev_err(sq->channel->netdev,
+ netdev_err(sq->netdev,
"Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
sq->sqn, sq->cc, sq->pc);
@@ -40,8 +46,8 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
int err;
sq = ctx;
- mdev = sq->channel->mdev;
- dev = sq->channel->netdev;
+ mdev = sq->mdev;
+ dev = sq->netdev;
if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
return 0;
@@ -67,7 +73,7 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
* pending WQEs. SQ can safely reset the SQ.
*/
- err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn);
+ err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
if (err)
goto out;
@@ -82,48 +88,42 @@ out:
return err;
}
-void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
-{
- struct mlx5e_priv *priv = sq->channel->priv;
- char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
- struct mlx5e_err_ctx err_ctx = {0};
-
- err_ctx.ctx = sq;
- err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
- sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
-
- mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
-}
+struct mlx5e_tx_timeout_ctx {
+ struct mlx5e_txqsq *sq;
+ signed int status;
+};
static int mlx5e_tx_reporter_timeout_recover(void *ctx)
{
+ struct mlx5e_tx_timeout_ctx *to_ctx;
+ struct mlx5e_priv *priv;
struct mlx5_eq_comp *eq;
struct mlx5e_txqsq *sq;
int err;
- sq = ctx;
+ to_ctx = ctx;
+ sq = to_ctx->sq;
eq = sq->cq.mcq.eq;
- err = mlx5e_health_channel_eq_recover(eq, sq->channel);
- if (err)
- clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-
- return err;
-}
+ priv = sq->priv;
+ err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
+ if (!err) {
+ to_ctx->status = 0; /* this sq recovered */
+ return err;
+ }
-int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
-{
- struct mlx5e_priv *priv = sq->channel->priv;
- char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
- struct mlx5e_err_ctx err_ctx;
+ err = mlx5e_safe_reopen_channels(priv);
+ if (!err) {
+ to_ctx->status = 1; /* all channels recovered */
+ return err;
+ }
- err_ctx.ctx = sq;
- err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
- sprintf(err_str,
- "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
- sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
- jiffies_to_usecs(jiffies - sq->txq->trans_start));
+ to_ctx->status = err;
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ netdev_err(priv->netdev,
+ "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
+ err);
- return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+ return err;
}
/* state lock cannot be grabbed within this function.
@@ -146,11 +146,11 @@ static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
}
static int
-mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
- struct mlx5e_txqsq *sq, int tc)
+mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *sq, int tc)
{
- struct mlx5e_priv *priv = sq->channel->priv;
bool stopped = netif_xmit_stopped(sq->txq);
+ struct mlx5e_priv *priv = sq->priv;
u8 state;
int err;
@@ -158,14 +158,6 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
if (err)
return err;
- err = devlink_fmsg_obj_nest_start(fmsg);
- if (err)
- return err;
-
- err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
- if (err)
- return err;
-
err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
if (err)
return err;
@@ -194,7 +186,28 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
if (err)
return err;
- err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg);
+ err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *sq, int tc)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
if (err)
return err;
@@ -205,49 +218,153 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
return 0;
}
-static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
- struct devlink_fmsg *fmsg,
- struct netlink_ext_ack *extack)
+static int
+mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
+ struct mlx5e_ptpsq *ptpsq, int tc)
{
- struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
- struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
- u32 sq_stride, sq_sz;
+ int err;
- int i, tc, err = 0;
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
- mutex_lock(&priv->state_lock);
+ err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+ if (err)
+ return err;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto unlock;
+ err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
+ if (err)
+ return err;
- sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq);
- sq_stride = MLX5_SEND_WQE_BB;
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
+ if (err)
+ return err;
- err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config");
+ err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
if (err)
- goto unlock;
+ return err;
- err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
if (err)
- goto unlock;
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *txqsq)
+{
+ u32 sq_stride, sq_sz;
+ bool real_time;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ return err;
+
+ real_time = mlx5_is_real_time_sq(txqsq->mdev);
+ sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
+ sq_stride = MLX5_SEND_WQE_BB;
err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
if (err)
- goto unlock;
+ return err;
err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
if (err)
- goto unlock;
+ return err;
- err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg);
+ err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
if (err)
- goto unlock;
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
+ struct mlx5e_ptpsq *ptpsq)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5e_ptpsq *generic_ptpsq;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
+ if (err)
+ return err;
+
+ if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
+ goto out;
+
+ generic_ptpsq = &ptp_ch->ptpsq[0];
- err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+out:
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+
+ int i, tc, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
- err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
if (err)
goto unlock;
@@ -258,7 +375,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
- for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
struct mlx5e_txqsq *sq = &c->sq[tc];
err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
@@ -266,6 +383,19 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
goto unlock;
}
}
+
+ if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
+ goto close_sqs_nest;
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
+ &ptp_ch->ptpsq[tc],
+ tc);
+ if (err)
+ goto unlock;
+ }
+
+close_sqs_nest:
err = devlink_fmsg_arr_pair_nest_end(fmsg);
if (err)
goto unlock;
@@ -275,33 +405,203 @@ unlock:
return err;
}
+static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5_rsc_key key = {};
+ struct mlx5e_txqsq *sq = ctx;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = sq->sqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
+static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5_rsc_key key = {};
+ int i, tc, err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ return err;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &c->sq[tc];
+
+ err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
+ if (err)
+ return err;
+ }
+ }
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
+
+ err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
+ if (err)
+ return err;
+ }
+ }
+
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+ struct mlx5e_err_ctx *err_ctx,
+ struct devlink_fmsg *fmsg)
+{
+ return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+ mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
+}
+
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = sq;
+ err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+ err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
+
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+}
+
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_tx_timeout_ctx to_ctx = {};
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ to_ctx.sq = sq;
+ err_ctx.ctx = &to_ctx;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
+ snprintf(err_str, sizeof(err_str),
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
+ sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
+
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+ return to_ctx.status;
+}
+
static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
.name = "tx",
.recover = mlx5e_tx_reporter_recover,
.diagnose = mlx5e_tx_reporter_diagnose,
+ .dump = mlx5e_tx_reporter_dump,
};
#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
-int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
+void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
struct devlink_health_reporter *reporter;
- struct mlx5_core_dev *mdev = priv->mdev;
- struct devlink *devlink;
-
- devlink = priv_to_devlink(mdev);
- reporter =
- devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
- MLX5_REPORTER_TX_GRACEFUL_PERIOD,
- true, priv);
+
+ reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops,
+ MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
if (IS_ERR(reporter)) {
netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n",
PTR_ERR(reporter));
- return PTR_ERR(reporter);
+ return;
}
priv->tx_reporter = reporter;
- return 0;
}
void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
@@ -309,5 +609,6 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
if (!priv->tx_reporter)
return;
- devlink_health_reporter_destroy(priv->tx_reporter);
+ devlink_port_health_reporter_destroy(priv->tx_reporter);
+ priv->tx_reporter = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
new file mode 100644
index 000000000000..b915fb29dd2c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rqt.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir->table[i] = i % num_channels;
+}
+
+static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u16 max_size, u32 *init_rqns, u16 init_size)
+{
+ void *rqtc;
+ int inlen;
+ int err;
+ u32 *in;
+ int i;
+
+ rqt->mdev = mdev;
+ rqt->size = max_size;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size);
+ for (i = 0; i < init_size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]);
+
+ err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn)
+{
+ u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1;
+
+ return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+ int inv = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+ return inv;
+}
+
+static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
+ unsigned int ix = i;
+
+ if (hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE));
+
+ ix = indir->table[ix];
+
+ if (WARN_ON(ix >= num_rqns))
+ /* Could be a bug in the driver or in the kernel part of
+ * ethtool: indir table refers to non-existent RQs.
+ */
+ return -EINVAL;
+ rss_rqns[i] = rqns[ix];
+ }
+
+ return 0;
+}
+
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
+
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
+{
+ mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
+}
+
+static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size)
+{
+ unsigned int i;
+ void *rqtc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+ MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, size);
+ for (i = 0; i < size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+
+ err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn)
+{
+ return mlx5e_rqt_redirect(rqt, &rqn, 1);
+}
+
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE))
+ return -EINVAL;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
new file mode 100644
index 000000000000..60c985a12f24
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RQT_H__
+#define __MLX5_EN_RQT_H__
+
+#include <linux/kernel.h>
+
+#define MLX5E_INDIR_RQT_SIZE (1 << 8)
+
+struct mlx5_core_dev;
+
+struct mlx5e_rss_params_indir {
+ u32 table[MLX5E_INDIR_RQT_SIZE];
+};
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels);
+
+struct mlx5e_rqt {
+ struct mlx5_core_dev *mdev;
+ u32 rqtn;
+ u16 size;
+};
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn);
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt);
+
+static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
+{
+ return rqt->rqtn;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn);
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+
+#endif /* __MLX5_EN_RQT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
new file mode 100644
index 000000000000..7f93426b88b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.
+
+#include "rss.h"
+
+#define mlx5e_rss_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5_TT_IPV6] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
+{
+ return rss_default_config[tt];
+}
+
+struct mlx5e_rss {
+ struct mlx5e_rss_params_hash hash;
+ struct mlx5e_rss_params_indir indir;
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_rqt rqt;
+ struct mlx5_core_dev *mdev;
+ u32 drop_rqn;
+ bool inner_ft_support;
+ bool enabled;
+ refcount_t refcnt;
+};
+
+struct mlx5e_rss *mlx5e_rss_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL);
+}
+
+void mlx5e_rss_free(struct mlx5e_rss *rss)
+{
+ kvfree(rss);
+}
+
+static void mlx5e_rss_params_init(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+
+ rss->hash.hfunc = ETH_RSS_HASH_TOP;
+ netdev_rss_key_fill(rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss->rx_hash_fields[tt] =
+ mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return inner ? &rss->inner_tir[tt] : &rss->tir[tt];
+}
+
+static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return *rss_get_tirp(rss, tt, inner);
+}
+
+static struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
+ rss_tt.rx_hash_fields = rss->rx_hash_fields[tt];
+ return rss_tt;
+}
+
+static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+ u32 rqtn;
+ int err;
+
+ if (inner && !rss->inner_ft_support) {
+ mlx5e_rss_warn(rss->mdev,
+ "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n",
+ tt);
+ return -EINVAL;
+ }
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (*tir_p)
+ return -EINVAL;
+
+ tir = kvzalloc(sizeof(*tir), GFP_KERNEL);
+ if (!tir)
+ return -ENOMEM;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder) {
+ err = -ENOMEM;
+ goto free_tir;
+ }
+
+ rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+ mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+ rqtn, rss->inner_ft_support);
+ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+
+ err = mlx5e_tir_init(tir, builder, rss->mdev, true);
+ mlx5e_tir_builder_free(builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n",
+ inner ? "inner " : "", err, tt);
+ goto free_tir;
+ }
+
+ *tir_p = tir;
+ return 0;
+
+free_tir:
+ kvfree(tir);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (!*tir_p)
+ return;
+
+ tir = *tir_p;
+ mlx5e_tir_destroy(tir);
+ kvfree(tir);
+ *tir_p = NULL;
+}
+
+static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+{
+ enum mlx5_traffic_types tt, max_tt;
+ int err;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner)
+{
+ enum mlx5_traffic_types tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+}
+
+static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir *tir;
+ int err;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir)
+ return 0;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+ err = mlx5e_tir_modify(tir, builder);
+
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+ int err, retval;
+
+ retval = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+
+ if (!rss->inner_ft_support)
+ continue;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+ }
+ return retval;
+}
+
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn)
+{
+ rss->mdev = mdev;
+ rss->inner_ft_support = inner_ft_support;
+ rss->drop_rqn = drop_rqn;
+
+ mlx5e_rss_params_init(rss);
+ refcount_set(&rss->refcnt, 1);
+
+ return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn);
+}
+
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+{
+ int err;
+
+ err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
+ if (err)
+ goto err_destroy_rqt;
+
+ if (inner_ft_support) {
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ mlx5e_rss_destroy_tirs(rss, false);
+err_destroy_rqt:
+ mlx5e_rqt_destroy(&rss->rqt);
+err_out:
+ return err;
+}
+
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss)
+{
+ if (!refcount_dec_if_one(&rss->refcnt))
+ return -EBUSY;
+
+ mlx5e_rss_destroy_tirs(rss, false);
+
+ if (rss->inner_ft_support)
+ mlx5e_rss_destroy_tirs(rss, true);
+
+ mlx5e_rqt_destroy(&rss->rqt);
+
+ return 0;
+}
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss)
+{
+ refcount_inc(&rss->refcnt);
+}
+
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss)
+{
+ refcount_dec(&rss->refcnt);
+}
+
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss)
+{
+ return refcount_read(&rss->refcnt);
+}
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir *tir;
+
+ WARN_ON(inner && !rss->inner_ft_support);
+ tir = rss_get_tir(rss, tt, inner);
+ WARN_ON(!tir);
+
+ return mlx5e_tir_get_tirn(tir);
+}
+
+/* Fill the "tirn" output parameter.
+ * Create the requested TIR if it's its first usage.
+ */
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn)
+{
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir) { /* TIR doesn't exist, create one */
+ int err;
+
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ return err;
+ tir = rss_get_tir(rss, tt, inner);
+ }
+
+ *tirn = mlx5e_tir_get_tirn(tir);
+ return 0;
+}
+
+static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), err);
+ return err;
+}
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ rss->enabled = true;
+ mlx5e_rss_apply(rss, rqns, num_rqns);
+}
+
+void mlx5e_rss_disable(struct mlx5e_rss *rss)
+{
+ int err;
+
+ rss->enabled = false;
+ err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+}
+
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ struct mlx5e_tir_builder *builder;
+ enum mlx5_traffic_types tt;
+ int err, final_err;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, false);
+ if (!tir)
+ goto inner_tir;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+
+inner_tir:
+ if (!rss->inner_ft_support)
+ continue;
+
+ tir = rss_get_tir(rss, tt, true);
+ if (!tir)
+ continue;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
+{
+ unsigned int i;
+
+ if (indir)
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir[i] = rss->indir.table[i];
+
+ if (key)
+ memcpy(key, rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = rss->hash.hfunc;
+
+ return 0;
+}
+
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns)
+{
+ bool changed_indir = false;
+ bool changed_hash = false;
+ struct mlx5e_rss *old_rss;
+ int err = 0;
+
+ old_rss = mlx5e_rss_alloc();
+ if (!old_rss)
+ return -ENOMEM;
+
+ *old_rss = *rss;
+
+ if (hfunc && *hfunc != rss->hash.hfunc) {
+ switch (*hfunc) {
+ case ETH_RSS_HASH_XOR:
+ case ETH_RSS_HASH_TOP:
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ changed_hash = true;
+ changed_indir = true;
+ rss->hash.hfunc = *hfunc;
+ }
+
+ if (key) {
+ if (rss->hash.hfunc == ETH_RSS_HASH_TOP)
+ changed_hash = true;
+ memcpy(rss->hash.toeplitz_hash_key, key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ }
+
+ if (indir) {
+ unsigned int i;
+
+ changed_indir = true;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ rss->indir.table[i] = indir[i];
+ }
+
+ if (changed_indir && rss->enabled) {
+ err = mlx5e_rss_apply(rss, rqns, num_rqns);
+ if (err) {
+ *rss = *old_rss;
+ goto out;
+ }
+ }
+
+ if (changed_hash)
+ mlx5e_rss_update_tirs(rss);
+
+out:
+ mlx5e_rss_free(old_rss);
+ return err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss)
+{
+ return rss->hash;
+}
+
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ return rss->rx_hash_fields[tt];
+}
+
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ u8 old_rx_hash_fields;
+ int err;
+
+ old_rx_hash_fields = rss->rx_hash_fields[tt];
+
+ if (old_rx_hash_fields == rx_hash_fields)
+ return 0;
+
+ rss->rx_hash_fields[tt] = rx_hash_fields;
+
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ return err;
+ }
+
+ if (!(rss->inner_ft_support))
+ return 0;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ /* Partial update happened. Try to revert - it may fail too, but
+ * there is nothing more we can do.
+ */
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ if (mlx5e_rss_update_tir(rss, tt, false))
+ mlx5e_rss_warn(rss->mdev,
+ "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+ tt);
+ }
+
+ return err;
+}
+
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch)
+{
+ mlx5e_rss_params_indir_init_uniform(&rss->indir, nch);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
new file mode 100644
index 000000000000..c6b216416344
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_RSS_H__
+#define __MLX5_EN_RSS_H__
+
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+struct mlx5e_rss;
+
+struct mlx5e_rss *mlx5e_rss_alloc(void);
+void mlx5e_rss_free(struct mlx5e_rss *rss);
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param);
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn);
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss);
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss);
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss);
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner);
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn);
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+void mlx5e_rss_disable(struct mlx5e_rss *rss);
+
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns);
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch);
+#endif /* __MLX5_EN_RSS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
new file mode 100644
index 000000000000..e1095bc36543
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rx_res.h"
+#include "channels.h"
+#include "params.h"
+
+#define MLX5E_MAX_NUM_RSS 16
+
+struct mlx5e_rx_res {
+ struct mlx5_core_dev *mdev;
+ enum mlx5e_rx_res_features features;
+ unsigned int max_nch;
+ u32 drop_rqn;
+
+ struct mlx5e_packet_merge_param pkt_merge_param;
+ struct rw_semaphore pkt_merge_param_sem;
+
+ struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+ bool rss_active;
+ u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+ unsigned int rss_nch;
+
+ struct {
+ struct mlx5e_rqt direct_rqt;
+ struct mlx5e_tir direct_tir;
+ } *channels;
+
+ struct {
+ struct mlx5e_rqt rqt;
+ struct mlx5e_tir tir;
+ } ptp;
+};
+
+/* API for rx_res_rss_* */
+
+static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+ unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err;
+
+ if (WARN_ON(res->rss[0]))
+ return -EINVAL;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+ &res->pkt_merge_param);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+
+ res->rss[0] = rss;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err, i;
+
+ for (i = 1; i < MLX5E_MAX_NUM_RSS; i++)
+ if (!res->rss[i])
+ break;
+
+ if (i == MLX5E_MAX_NUM_RSS)
+ return -ENOSPC;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+ if (res->rss_active)
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+
+ res->rss[i] = rss;
+ *rss_idx = i;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss = res->rss[rss_idx];
+ int err;
+
+ err = mlx5e_rss_cleanup(rss);
+ if (err)
+ return err;
+
+ mlx5e_rss_free(rss);
+ res->rss[rss_idx] = NULL;
+
+ return 0;
+}
+
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -EINVAL;
+
+ return __mlx5e_rx_res_rss_destroy(res, rss_idx);
+}
+
+static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+ int err;
+
+ if (!rss)
+ continue;
+
+ err = __mlx5e_rx_res_rss_destroy(res, i);
+ if (err) {
+ unsigned int refcount;
+
+ refcount = mlx5e_rss_refcnt_read(rss);
+ mlx5_core_warn(res->mdev,
+ "Failed to destroy RSS context %d, refcount = %u, err = %d\n",
+ i, refcount, err);
+ }
+ }
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = true;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+ }
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = false;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_disable(rss);
+ }
+}
+
+/* Updates the indirection table SW shadow, does not update the HW resources yet */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+ WARN_ON_ONCE(res->rss_active);
+ mlx5e_rss_set_indir_uniform(res->rss[0], nch);
+}
+
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_get_rxfh(rss, indir, key, hfunc);
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch);
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_hash_fields(rss, tt);
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields);
+}
+
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res)
+{
+ int i, cnt;
+
+ cnt = 0;
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (res->rss[i])
+ cnt++;
+
+ return cnt;
+}
+
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss)
+{
+ int i;
+
+ if (!rss)
+ return -EINVAL;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (rss == res->rss[i])
+ return i;
+
+ return -ENOENT;
+}
+
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return NULL;
+
+ return res->rss[rss_idx];
+}
+
+/* End of API rx_res_rss_* */
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+ int ix;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+ if (!res->channels) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ goto out;
+
+err_destroy_direct_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+ ix = res->max_nch;
+err_destroy_direct_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ kvfree(res->channels);
+
+out:
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+ if (err)
+ goto out;
+
+ /* Separated from the channels RQs, does not share pkt_merge state with them */
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+ if (err)
+ goto err_destroy_ptp_rqt;
+
+ goto out;
+
+err_destroy_ptp_rqt:
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+ }
+
+ kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_tir_destroy(&res->ptp.tir);
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch)
+{
+ int err;
+
+ res->mdev = mdev;
+ res->features = features;
+ res->max_nch = max_nch;
+ res->drop_rqn = drop_rqn;
+
+ res->pkt_merge_param = *init_pkt_merge_param;
+ init_rwsem(&res->pkt_merge_param_sem);
+
+ err = mlx5e_rx_res_rss_init_def(res, init_nch);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rx_res_channels_init(res);
+ if (err)
+ goto err_rss_destroy;
+
+ err = mlx5e_rx_res_ptp_init(res);
+ if (err)
+ goto err_channels_destroy;
+
+ return 0;
+
+err_channels_destroy:
+ mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+ __mlx5e_rx_res_rss_destroy(res, 0);
+err_out:
+ return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_rx_res_ptp_destroy(res);
+ mlx5e_rx_res_channels_destroy(res);
+ mlx5e_rx_res_rss_destroy_all(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+ kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, false);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, true);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+ return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
+ struct mlx5e_channels *chs,
+ unsigned int ix)
+{
+ u32 rqn = res->rss_rqns[ix];
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+}
+
+static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
+ unsigned int ix)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
+
+ nch = mlx5e_channels_get_num(chs);
+
+ for (ix = 0; ix < chs->num; ix++) {
+ if (mlx5e_channels_is_xsk(chs, ix))
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ }
+ res->rss_nch = chs->num;
+
+ mlx5e_rx_res_rss_enable(res);
+
+ for (ix = 0; ix < nch; ix++)
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+ for (ix = nch; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ u32 rqn;
+
+ if (!mlx5e_channels_get_ptp_rqn(chs, &rqn))
+ rqn = res->drop_rqn;
+
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ rqn, err);
+ }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+ int err;
+
+ mlx5e_rx_res_rss_disable(res);
+
+ for (ix = 0; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ res->drop_rqn, err);
+ }
+}
+
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk)
+{
+ if (xsk)
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+
+ mlx5e_rx_res_rss_enable(res);
+
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+}
+
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ struct mlx5e_tir_builder *builder;
+ int err, final_err;
+ unsigned int ix;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ down_write(&res->pkt_merge_param_sem);
+ res->pkt_merge_param = *pkt_merge_param;
+
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+ for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) {
+ struct mlx5e_rss *rss = res->rss[ix];
+
+ if (!rss)
+ continue;
+
+ err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
+ if (err)
+ final_err = final_err ? : err;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ up_write(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+ return mlx5e_rss_get_hash(res->rss[0]);
+}
+
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+ struct mlx5e_tir *tir)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ u32 rqtn;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq);
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn,
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+ mlx5e_tir_builder_build_tls(builder);
+ down_read(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+ err = mlx5e_tir_init(tir, builder, res->mdev, false);
+ up_read(&res->pkt_merge_param_sem);
+
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
new file mode 100644
index 000000000000..5d5f64fab60f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RX_RES_H__
+#define __MLX5_EN_RX_RES_H__
+
+#include <linux/kernel.h>
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+#include "rss.h"
+
+struct mlx5e_rx_res;
+
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
+
+enum mlx5e_rx_res_features {
+ MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(1),
+};
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk);
+
+/* Configuration API */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc);
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss);
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
+
+/* Accel TIRs */
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+ struct mlx5e_tir *tir);
+#endif /* __MLX5_EN_RX_RES_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
new file mode 100644
index 000000000000..f675b1926340
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "selq.h"
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include "en.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+
+struct mlx5e_selq_params {
+ unsigned int num_regular_queues;
+ unsigned int num_channels;
+ unsigned int num_tcs;
+ union {
+ u8 is_special_queues;
+ struct {
+ bool is_htb : 1;
+ bool is_ptp : 1;
+ };
+ };
+ u16 htb_maj_id;
+ u16 htb_defcls;
+};
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+{
+ struct mlx5e_selq_params *init_params;
+
+ selq->state_lock = state_lock;
+
+ selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
+ if (!selq->standby)
+ return -ENOMEM;
+
+ init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
+ if (!init_params) {
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ return -ENOMEM;
+ }
+ /* Assign dummy values, so that mlx5e_select_queue won't crash. */
+ *init_params = (struct mlx5e_selq_params) {
+ .num_regular_queues = 1,
+ .num_channels = 1,
+ .num_tcs = 1,
+ .is_htb = false,
+ .is_ptp = false,
+ .htb_maj_id = 0,
+ .htb_defcls = 0,
+ };
+ rcu_assign_pointer(selq->active, init_params);
+
+ return 0;
+}
+
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+{
+ WARN_ON_ONCE(selq->is_prepared);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ selq->is_prepared = true;
+
+ mlx5e_selq_apply(selq);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+}
+
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->num_channels = params->num_channels;
+ selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
+ selq->standby->num_regular_queues =
+ selq->standby->num_channels * selq->standby->num_tcs;
+ selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
+}
+
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *selq_active =
+ rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock));
+
+ return selq_active->htb_maj_id;
+}
+
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->is_htb = htb_maj_id;
+ selq->standby->htb_maj_id = htb_maj_id;
+ selq->standby->htb_defcls = htb_defcls;
+}
+
+void mlx5e_selq_apply(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *old_params;
+
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+
+ old_params = rcu_replace_pointer(selq->active, selq->standby,
+ lockdep_is_held(selq->state_lock));
+ synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
+ selq->standby = old_params;
+}
+
+void mlx5e_selq_cancel(struct mlx5e_selq *selq)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
+ return mlx5e_get_dscp_up(priv, skb);
+#endif
+ if (skb_vlan_tag_present(skb))
+ return skb_vlan_tag_get_prio(skb);
+ return 0;
+}
+
+static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int up;
+
+ up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
+
+ return selq->num_regular_queues + up;
+}
+
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ u16 classid;
+
+ /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+ if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id)
+ classid = TC_H_MIN(skb->priority);
+ else
+ classid = selq->htb_defcls;
+
+ if (!classid)
+ return 0;
+
+ return mlx5e_htb_get_txq_by_classid(priv->htb, classid);
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_selq_params *selq;
+ int txq_ix, up;
+
+ selq = rcu_dereference_bh(priv->selq.active);
+
+ /* This is a workaround needed only for the mlx5e_netdev_change_profile
+ * flow that zeroes out the whole priv without unregistering the netdev
+ * and without preventing ndo_select_queue from being called.
+ */
+ if (unlikely(!selq))
+ return 0;
+
+ if (likely(!selq->is_special_queues)) {
+ /* No special queues, netdev_pick_tx returns one of the regular ones. */
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ /* Normalize any picked txq_ix to [0, num_channels),
+ * So we can return a txq_ix that matches the channel and
+ * packet UP.
+ */
+ return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
+ up * selq->num_channels;
+ }
+
+ if (unlikely(selq->htb_maj_id)) {
+ /* num_tcs == 1, shortcut for PTP */
+
+ txq_ix = mlx5e_select_htb_queue(priv, skb, selq);
+ if (txq_ix > 0)
+ return txq_ix;
+
+ if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
+ return selq->num_channels;
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
+ * If they are selected, switch to regular queues.
+ * Driver to select these queues only at mlx5e_select_ptpsq()
+ * and mlx5e_select_htb_queue().
+ */
+ return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
+ }
+
+ /* PTP is enabled */
+
+ if (mlx5e_use_ptpsq(skb))
+ return mlx5e_select_ptpsq(dev, skb, selq);
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Normalize any picked txq_ix to [0, num_channels). Queues in range
+ * [0, num_regular_queues) will be mapped to the corresponding channel
+ * index, so that we can apply the packet's UP (if num_tcs > 1).
+ * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
+ * because driver should select the PTP only at mlx5e_select_ptpsq().
+ */
+ txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ return txq_ix + up * selq->num_channels;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
new file mode 100644
index 000000000000..fd590f80e4d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_SELQ_H__
+#define __MLX5_EN_SELQ_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_selq_params;
+
+struct mlx5e_selq {
+ struct mlx5e_selq_params __rcu *active;
+ struct mlx5e_selq_params *standby;
+ struct mutex *state_lock; /* points to priv->state_lock */
+ bool is_prepared;
+};
+
+struct mlx5e_params;
+struct net_device;
+struct sk_buff;
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params);
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls);
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq);
+void mlx5e_selq_apply(struct mlx5e_selq *selq);
+void mlx5e_selq_cancel(struct mlx5e_selq *selq);
+
+static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
+{
+ while (unlikely(txq >= num_channels))
+ txq -= num_channels;
+ return txq;
+}
+
+static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
+{
+ if (unlikely(txq >= num_channels)) {
+ if (unlikely(txq >= num_channels << 3))
+ txq %= num_channels;
+ else
+ do
+ txq -= num_channels;
+ while (txq >= num_channels);
+ }
+ return txq;
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+
+#endif /* __MLX5_EN_SELQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
new file mode 100644
index 000000000000..21aab96357b5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_accept = {
+ .can_offload = tc_act_can_offload_accept,
+ .parse_action = tc_act_parse_accept,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
new file mode 100644
index 000000000000..3337241cfd84
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc/post_act.h"
+#include "en/tc_priv.h"
+#include "mlx5_core.h"
+
+static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress,
+ [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle,
+ [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap,
+ [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype,
+ [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample,
+ [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+ [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push,
+ [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop,
+ [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan,
+};
+
+static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+};
+
+/**
+ * mlx5e_tc_act_get() - Get an action parser for an action id.
+ * @act_id: Flow action id.
+ * @ns_type: flow namespace type.
+ */
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_tc_act **tc_acts;
+
+ tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic;
+
+ return tc_acts[act_id];
+}
+
+/**
+ * mlx5e_tc_act_init_parse_state() - Init a new parse_state.
+ * @parse_state: Parsing state.
+ * @flow: mlx5e tc flow being handled.
+ * @flow_action: flow action to parse.
+ * @extack: to set an error msg.
+ *
+ * The same parse_state should be passed to action parsers
+ * for tracking the current parsing state.
+ */
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ memset(parse_state, 0, sizeof(*parse_state));
+ parse_state->flow = flow;
+ parse_state->extack = extack;
+ parse_state->flow_action = flow_action;
+}
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder)
+{
+ struct flow_action_entry *act;
+ int i, j = 0;
+
+ flow_action_for_each(i, act, flow_action) {
+ /* Add CT action to be first. */
+ if (act->id == FLOW_ACTION_CT)
+ flow_action_reorder->entries[j++] = act;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT)
+ continue;
+ flow_action_reorder->entries[j++] = act;
+ }
+}
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct flow_action_entry *act;
+ struct mlx5e_tc_act *tc_act;
+ struct mlx5e_priv *priv;
+ int err = 0, i;
+
+ priv = parse_state->flow->priv;
+
+ flow_action_for_each(i, act, flow_action) {
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act || !tc_act->post_parse)
+ continue;
+
+ err = tc_act->post_parse(parse_state, priv, attr);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr)
+{
+ struct mlx5_core_dev *mdev = flow->priv->mdev;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ int err;
+
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+
+ /* Set handle on current post act rule to next post act rule. */
+ err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts);
+ if (err) {
+ mlx5_core_warn(mdev, "Failed setting post action handle");
+ return err;
+ }
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
new file mode 100644
index 000000000000..e1570ff056ae
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_H__
+#define __MLX5_EN_TC_ACT_H__
+
+#include <net/tc_act/tc_pedit.h>
+#include <net/flow_offload.h>
+#include <linux/netlink.h>
+#include "eswitch.h"
+#include "pedit.h"
+
+struct mlx5_flow_attr;
+
+struct mlx5e_tc_act_parse_state {
+ struct flow_action *flow_action;
+ struct mlx5e_tc_flow *flow;
+ struct netlink_ext_ack *extack;
+ u32 actions;
+ bool ct;
+ bool ct_clear;
+ bool encap;
+ bool decap;
+ bool mpls_push;
+ bool eth_push;
+ bool eth_pop;
+ bool ptype_host;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
+ int if_count;
+ struct mlx5_tc_ct_priv *ct_priv;
+};
+
+struct mlx5e_tc_act {
+ bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr);
+
+ int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ bool (*is_multi_table_act)(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr);
+
+ int (*offload_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act);
+
+ int (*destroy_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+
+ int (*stats_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+};
+
+struct mlx5e_tc_flow_action {
+ unsigned int num_entries;
+ struct flow_action_entry **entries;
+};
+
+extern struct mlx5e_tc_act mlx5e_tc_act_drop;
+extern struct mlx5e_tc_act mlx5e_tc_act_trap;
+extern struct mlx5e_tc_act mlx5e_tc_act_accept;
+extern struct mlx5e_tc_act mlx5e_tc_act_mark;
+extern struct mlx5e_tc_act mlx5e_tc_act_goto;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap;
+extern struct mlx5e_tc_act mlx5e_tc_act_csum;
+extern struct mlx5e_tc_act mlx5e_tc_act_pedit;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic;
+extern struct mlx5e_tc_act mlx5e_tc_act_ct;
+extern struct mlx5e_tc_act mlx5e_tc_act_sample;
+extern struct mlx5e_tc_act mlx5e_tc_act_ptype;
+extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress;
+extern struct mlx5e_tc_act mlx5e_tc_act_police;
+
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack);
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder);
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type);
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr);
+
+#endif /* __MLX5_EN_TC_ACT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
new file mode 100644
index 000000000000..c0f08ae6a57f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/tc_act/tc_csum.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+csum_offload_supported(struct mlx5e_priv *priv,
+ u32 action,
+ u32 update_flags,
+ struct netlink_ext_ack *extack)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC csum action is only offloaded with pedit");
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload TC csum action for some header/s");
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ return csum_offload_supported(flow->priv, attr->action,
+ act->csum_flags, parse_state->extack);
+}
+
+static int
+tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_csum = {
+ .can_offload = tc_act_can_offload_csum,
+ .parse_action = tc_act_parse_csum,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
new file mode 100644
index 000000000000..a829c94289c1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+
+static bool
+tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->ct && !clear_action) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ int err;
+
+ /* It's redundant to do ct clear more than once. */
+ if (clear_action && parse_state->ct_clear)
+ return 0;
+
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
+ &attr->parse_attr->mod_hdr_acts,
+ act, parse_state->extack);
+ if (err)
+ return err;
+
+
+ if (mlx5e_is_eswitch_flow(parse_state->flow))
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ if (clear_action) {
+ parse_state->ct_clear = true;
+ } else {
+ attr->flags |= MLX5_ATTR_FLAG_CT;
+ flow_flag_set(parse_state->flow, CT);
+ parse_state->ct = true;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+ int err;
+
+ /* If ct action exist, we can ignore previous ct_clear actions */
+ if (parse_state->ct)
+ return 0;
+
+ if (parse_state->ct_clear) {
+ err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Failed to set registers for ct clear");
+ return err;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Prevent handling of additional, redundant clear actions */
+ parse_state->ct_clear = false;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->ct.action & TCA_CT_ACT_CLEAR)
+ return false;
+
+ return true;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ct = {
+ .can_offload = tc_act_can_offload_ct,
+ .parse_action = tc_act_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .post_parse = tc_act_post_parse_ct,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
new file mode 100644
index 000000000000..dd025a95c439
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_drop = {
+ .can_offload = tc_act_can_offload_drop,
+ .parse_action = tc_act_parse_drop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
new file mode 100644
index 000000000000..25174f68613e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "eswitch.h"
+
+static int
+validate_goto_chain(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
+ bool ft_flow = mlx5e_is_ft_flow(flow);
+ u32 dest_chain = act->chain_index;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_eswitch *esw;
+ u32 reformat_and_fwd;
+ u32 max_chain;
+
+ esw = priv->mdev->priv.eswitch;
+ chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc);
+ max_chain = mlx5_chains_get_chain_range(chains);
+ reformat_and_fwd = is_esw ?
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+ if (ft_flow) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_chains_backwards_supported(chains) &&
+ dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+
+ if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ !reformat_and_fwd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto chain is not allowed if action has reformat or decap");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (validate_goto_chain(flow->priv, flow, attr, act, extack))
+ return false;
+
+ return true;
+}
+
+static int
+tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_chain = act->chain_index;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (!attr->dest_chain)
+ return 0;
+
+ if (parse_state->decap) {
+ /* It can be supported if we'll create a mapping for
+ * the tunnel device only (without tunnel), and set
+ * this tunnel id with this decap flow.
+ *
+ * On restore (miss), we'll just set this saved tunnel
+ * device.
+ */
+
+ NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported");
+ netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_goto = {
+ .can_offload = tc_act_can_offload_goto,
+ .parse_action = tc_act_parse_goto,
+ .post_parse = tc_act_post_parse_goto,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
new file mode 100644
index 000000000000..e8d227595b3e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en_tc.h"
+
+static bool
+tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->nic_attr->flow_tag = act->mark;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mark = {
+ .can_offload = tc_act_can_offload_mark,
+ .parse_action = tc_act_parse_mark,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
new file mode 100644
index 000000000000..4ac7de3f6afa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_macvlan.h>
+#include <linux/if_vlan.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+#include "en_rep.h"
+#include "lag/lag.h"
+
+static bool
+same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ return mlx5e_eswitch_vf_rep(priv->netdev) &&
+ priv->netdev == out_dev;
+}
+
+static int
+verify_uplink_forwarding(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rep_priv;
+
+ /* Forwarding non encapsulated traffic between
+ * uplink ports is allowed only if
+ * termination_table_raw_traffic cap is set.
+ *
+ * Input vport was stored attr->in_rep.
+ * In LAG case, *priv* is the private data of
+ * uplink which may be not the input vport.
+ */
+ rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep);
+
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+ mlx5e_eswitch_uplink_rep(out_dev)))
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+ termination_table_raw_traffic)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are both uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ } else if (out_dev != rep_priv->netdev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not the same uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static bool
+is_duplicated_output_device(struct net_device *dev,
+ struct net_device *out_dev,
+ int *ifindexes, int if_count,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < if_count; i++) {
+ if (ifindexes[i] == out_dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device");
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
+ out_dev->name);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct net_device *
+get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
+{
+ struct net_device *fdb_out_dev = out_dev;
+ struct net_device *uplink_upper;
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev) {
+ fdb_out_dev = uplink_dev;
+ } else if (netif_is_lag_master(out_dev)) {
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+ if (fdb_out_dev &&
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+ fdb_out_dev = NULL;
+ }
+ rcu_read_unlock();
+ return fdb_out_dev;
+}
+
+static bool
+tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return false;
+ }
+
+ if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device");
+ return false;
+ }
+
+ if (parse_state->eth_pop && !parse_state->mpls_push) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push");
+ return false;
+ }
+
+ if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push");
+ return false;
+ }
+
+ if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) {
+ /* Ignore forward to self rules generated
+ * by adding both mlx5 devs to the flow table
+ * block on a normal nft offload setup.
+ */
+ return false;
+ }
+
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't support more output ports, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "can't support more than %d output ports, can't offload forwarding\n",
+ esw_attr->out_count);
+ return false;
+ }
+
+ if (parse_state->encap ||
+ netdev_port_same_parent_id(priv->netdev, out_dev) ||
+ netif_is_ovs_master(out_dev))
+ return true;
+
+ if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return false;
+ }
+
+ NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
+
+ return false;
+}
+
+static int
+parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+
+ parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex;
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(parse_state->tun_info);
+
+ if (!parse_attr->tun_info[esw_attr->out_count])
+ return -ENOMEM;
+
+ parse_state->encap = false;
+
+ if (parse_state->mpls_push) {
+ memcpy(&parse_attr->mpls_info[esw_attr->out_count],
+ &parse_state->mpls_info, sizeof(parse_state->mpls_info));
+ parse_state->mpls_push = false;
+ }
+ esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP;
+ esw_attr->out_count++;
+ /* attr->dests[].rep is resolved when we handle encap */
+
+ return 0;
+}
+
+static int
+parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct net_device *out_dev = act->dev;
+ struct net_device *uplink_dev;
+ struct mlx5e_priv *out_priv;
+ struct mlx5_eswitch *esw;
+ bool is_uplink_rep;
+ int *ifindexes;
+ int if_count;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ ifindexes = parse_state->ifindexes;
+ if_count = parse_state->if_count;
+
+ if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack))
+ return -EOPNOTSUPP;
+
+ parse_state->ifindexes[if_count] = out_dev->ifindex;
+ parse_state->if_count++;
+ is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
+ err = mlx5_lag_do_mirred(priv->mdev, out_dev);
+ if (err)
+ return err;
+
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+ if (!out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(out_dev)) {
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack);
+ if (err)
+ return err;
+ }
+
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
+ err = verify_uplink_forwarding(priv, attr, out_dev, extack);
+ if (err)
+ return err;
+
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+
+ if (same_vf_reps(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself");
+ return -EOPNOTSUPP;
+ }
+
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+
+ /* If output device is bond master then rules are not explicit
+ * so we don't attempt to count them.
+ */
+ if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ attr->lag.count = true;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+static int
+parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+ return 0;
+}
+
+static int
+tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct net_device *out_dev = act->dev;
+ int err = -EOPNOTSUPP;
+
+ if (parse_state->encap)
+ err = parse_mirred_encap(parse_state, act, attr);
+ else if (netdev_port_same_parent_id(priv->netdev, out_dev))
+ err = parse_mirred(parse_state, act, priv, attr);
+ else if (netif_is_ovs_master(out_dev))
+ err = parse_mirred_ovs_master(parse_state, act, priv, attr);
+
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred = {
+ .can_offload = tc_act_can_offload_mirred,
+ .parse_action = tc_act_parse_mirred,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
new file mode 100644
index 000000000000..90b4c1b34776
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+
+ if (act->id != FLOW_ACTION_REDIRECT)
+ return false;
+
+ if (priv->netdev->netdev_ops != out_dev->netdev_ops ||
+ !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
+ netdev_name(priv->netdev),
+ out_dev->name);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex;
+ flow_flag_set(parse_state->flow, HAIRPIN);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = {
+ .can_offload = tc_act_can_offload_mirred_nic,
+ .parse_action = tc_act_parse_mirred_nic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
new file mode 100644
index 000000000000..f106190bf37c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/bareudp.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_priv *priv = parse_state->flow->priv;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) ||
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol");
+ return false;
+ }
+
+ return true;
+}
+
+static void
+copy_mpls_info(struct mlx5e_mpls_info *mpls_info,
+ const struct flow_action_entry *act)
+{
+ mpls_info->label = act->mpls_push.label;
+ mpls_info->tc = act->mpls_push.tc;
+ mpls_info->bos = act->mpls_push.bos;
+ mpls_info->ttl = act->mpls_push.ttl;
+}
+
+static int
+tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->mpls_push = true;
+ copy_mpls_info(&parse_state->mpls_info, act);
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct net_device *filter_dev;
+
+ filter_dev = attr->parse_attr->filter_dev;
+
+ /* we only support mpls pop if it is the first action
+ * or it is second action after tunnel key unset
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if ((act_index == 1 && !parse_state->decap) || act_index > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap");
+ return false;
+ }
+
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->esw_attr->eth.h_proto = act->mpls_pop.proto;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(parse_state->flow, L3_TO_L2_DECAP);
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_push = {
+ .can_offload = tc_act_can_offload_mpls_push,
+ .parse_action = tc_act_parse_mpls_push,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = {
+ .can_offload = tc_act_can_offload_mpls_pop,
+ .parse_action = tc_act_parse_mpls_pop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
new file mode 100644
index 000000000000..47597c524e59
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "pedit.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+
+static int pedit_header_offsets[] = {
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int
+set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) { /* disallow acting twice on the same location */
+ NL_SET_ERR_MSG_MOD(extack,
+ "curr_pmask and new mask same. Acting twice on same location");
+ goto out_err;
+ }
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ u8 htype = act->mangle.htype;
+ int err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+ goto out_err;
+ }
+
+ if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported");
+ goto out_err;
+ }
+
+ mask = act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
+
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack);
+ if (err)
+ goto out_err;
+
+ hdrs[cmd].pedits++;
+
+ return 0;
+out_err:
+ return err;
+}
+
+static bool
+tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+
+ err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs,
+ parse_state->extack);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_pedit = {
+ .can_offload = tc_act_can_offload_pedit,
+ .parse_action = tc_act_parse_pedit,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
new file mode 100644
index 000000000000..434c8bd710a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_PEDIT_H__
+#define __MLX5_EN_TC_ACT_PEDIT_H__
+
+#include "en_tc.h"
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct vlan_hdr vlan;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+struct pedit_headers_action {
+ struct pedit_headers vals;
+ struct pedit_headers masks;
+ u32 pedits;
+};
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
new file mode 100644
index 000000000000..c8e5ca65bb6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return false;
+ }
+ if (mlx5e_policer_validate(parse_state->flow_action, act,
+ parse_state->extack))
+ return false;
+
+ return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev);
+}
+
+static int
+fill_meter_params_from_act(const struct flow_action_entry *act,
+ struct mlx5e_flow_meter_params *params)
+{
+ params->index = act->hw_index;
+ if (act->police.rate_bytes_ps) {
+ params->mode = MLX5_RATE_LIMIT_BPS;
+ /* change rate to bits per second */
+ params->rate = act->police.rate_bytes_ps << 3;
+ params->burst = act->police.burst;
+ } else if (act->police.rate_pkt_ps) {
+ params->mode = MLX5_RATE_LIMIT_PPS;
+ params->rate = act->police.rate_pkt_ps;
+ params->burst = act->police.burst_pkt;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ int err;
+
+ err = fill_meter_params_from_act(act, &attr->meter_attr.params);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
+ attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_police(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_police_offload(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ int err = 0;
+
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
+ err = fill_meter_params_from_act(act, &params);
+ if (err)
+ return err;
+
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) {
+ meter = mlx5e_tc_meter_replace(priv->mdev, &params);
+ } else if (!IS_ERR(meter)) {
+ err = mlx5e_tc_meter_update(meter, &params);
+ mlx5e_tc_meter_put(meter);
+ }
+
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ err = PTR_ERR(meter);
+ }
+
+ return err;
+}
+
+static int
+tc_act_police_destroy(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+ /* first put for the get and second for cleanup */
+ mlx5e_tc_meter_put(meter);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+static int
+tc_act_police_stats(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ u64 bytes, packets, drops, lastuse;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+
+ mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse);
+ flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_police = {
+ .can_offload = tc_act_can_offload_police,
+ .parse_action = tc_act_parse_police,
+ .is_multi_table_act = tc_act_is_multi_table_act_police,
+ .offload_action = tc_act_police_offload,
+ .destroy_action = tc_act_police_destroy,
+ .stats_action = tc_act_police_stats,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
new file mode 100644
index 000000000000..6454b031ff7a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (act->ptype != PACKET_HOST) {
+ NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host");
+ return -EOPNOTSUPP;
+ }
+
+ parse_state->ptype_host = true;
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ptype = {
+ .can_offload = tc_act_can_offload_ptype,
+ .parse_action = tc_act_parse_ptype,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
new file mode 100644
index 000000000000..ad09a8a5f36e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev)
+ return false;
+
+ if (!netif_is_ovs_master(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is supported only for OVS internal ports");
+ return false;
+ }
+
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is not supported from internal port");
+ return false;
+ }
+
+ if (!parse_state->ptype_host) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress requires ptype=host action");
+ return false;
+ }
+
+ if (esw_attr->out_count) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress is supported only as single destination");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = {
+ .can_offload = tc_act_can_offload_redirect_ingress,
+ .parse_action = tc_act_parse_redirect_ingress,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
new file mode 100644
index 000000000000..2c0196431302
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/psample.h>
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc/act/sample.h"
+
+static bool
+tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ bool ct_nat;
+
+ ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+
+ if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
+ NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
+
+ sample_attr->rate = act->sample.rate;
+ sample_attr->group_num = act->sample.psample_group->group_num;
+
+ if (act->sample.truncate)
+ sample_attr->trunc_size = act->sample.trunc_size;
+
+ attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
+ flow_flag_set(parse_state->flow, SAMPLE);
+
+ return 0;
+}
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr)
+{
+ if (MLX5_CAP_GEN(mdev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return true;
+
+ return false;
+}
+
+static bool
+tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr);
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_sample = {
+ .can_offload = tc_act_can_offload_sample,
+ .parse_action = tc_act_parse_sample,
+ .is_multi_table_act = tc_act_is_multi_table_act_sample,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
new file mode 100644
index 000000000000..3efb3a15c5d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__
+#define __MLX5_EN_TC_ACT_SAMPLE_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr);
+
+#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
new file mode 100644
index 000000000000..53b270f652b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->flow_action->num_entries != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_trap = {
+ .can_offload = tc_act_can_offload_trap,
+ .parse_action = tc_act_parse_trap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
new file mode 100644
index 000000000000..b4fa2de9711d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (!act->tunnel) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Zero tunnel attributes is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->tun_info = act->tunnel;
+ parse_state->encap = true;
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->decap = true;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
+ .can_offload = tc_act_can_offload_tun_encap,
+ .parse_action = tc_act_parse_tun_encap,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
+ .can_offload = tc_act_can_offload_tun_decap,
+ .parse_action = tc_act_parse_tun_decap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
new file mode 100644
index 000000000000..b86ac604d0c2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+static int
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry prio_tag_act = {
+ .vlan.vid = 0,
+ .vlan.prio =
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_value(*action,
+ &parse_attr->spec),
+ first_prio) &
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_criteria(*action,
+ &parse_attr->spec),
+ first_prio),
+ };
+
+ return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+ &prio_tag_act, parse_attr, action,
+ extack);
+}
+
+static int
+parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action,
+ struct netlink_ext_ack *extack,
+ struct mlx5e_tc_act_parse_state *parse_state)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH) {
+ NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported");
+ return -EOPNOTSUPP;
+ }
+
+ switch (act->id) {
+ case FLOW_ACTION_VLAN_POP:
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vlan push action is not supported for vlan depth > 1");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
+ act->vlan.prio)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ break;
+ case FLOW_ACTION_VLAN_POP_ETH:
+ parse_state->eth_pop = true;
+ break;
+ case FLOW_ACTION_VLAN_PUSH_ETH:
+ if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP))
+ return -EOPNOTSUPP;
+ parse_state->eth_push = true;
+ memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN);
+ memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN");
+ return -EINVAL;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL);
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+ rcu_read_unlock();
+ if (!*out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(*out_dev))
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack);
+
+ return err;
+}
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int nest_level, err = 0;
+
+ nest_level = attr->parse_attr->filter_dev->lower_level -
+ priv->netdev->lower_level;
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action,
+ extack, NULL);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
+ /* Replace vlan pop+push with vlan modify */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
+ attr->parse_attr, &attr->action,
+ parse_state->extack);
+ } else {
+ err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
+ parse_state->extack, parse_state);
+ }
+
+ if (err)
+ return err;
+
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
+ * tag rewrite.
+ */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
+ &attr->action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan = {
+ .can_offload = tc_act_can_offload_vlan,
+ .parse_action = tc_act_parse_vlan,
+ .post_parse = tc_act_post_parse_vlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
new file mode 100644
index 000000000000..2fa58c6f44eb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_VLAN_H__
+#define __MLX5_EN_TC_ACT_VLAN_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
new file mode 100644
index 000000000000..9a8a1a6bd99e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ u16 mask16 = VLAN_VID_MASK;
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
+ const struct flow_action_entry pedit_act = {
+ .id = FLOW_ACTION_MANGLE,
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
+ };
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
+ int err;
+
+ headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs,
+ extack);
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(parse_state->flow);
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
+ &attr->action, parse_state->extack);
+ if (err)
+ return err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
+ .can_offload = tc_act_can_offload_vlan_mangle,
+ .parse_action = tc_act_parse_vlan_mangle,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
new file mode 100644
index 000000000000..bb6b1a979ba1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_TC_CT_FS_H__
+#define __MLX5_EN_TC_CT_FS_H__
+
+struct mlx5_ct_fs {
+ const struct net_device *netdev;
+ struct mlx5_core_dev *dev;
+
+ /* private data */
+ void *priv_data[];
+};
+
+struct mlx5_ct_fs_rule {
+};
+
+struct mlx5_ct_fs_ops {
+ int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct);
+ void (*destroy)(struct mlx5_ct_fs *fs);
+
+ struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule);
+ void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule);
+
+ size_t priv_size;
+};
+
+static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs)
+{
+ return &fs->priv_data;
+}
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void);
+
+#if IS_ENABLED(CONFIG_MLX5_SW_STEERING)
+struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void);
+#else
+static inline struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */
+
+#endif /* __MLX5_EN_TC_CT_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
new file mode 100644
index 000000000000..ae4f55be48ce
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "en_tc.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args)
+
+struct mlx5_ct_fs_dmfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+};
+
+static int
+mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ return 0;
+}
+
+static void
+mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs)
+{
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5e_priv *priv = netdev_priv(fs->netdev);
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule;
+ int err;
+
+ dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL);
+ if (!dmfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ if (IS_ERR(dmfs_rule->rule)) {
+ err = PTR_ERR(dmfs_rule->rule);
+ ct_dbg("Failed to add ct entry fs rule");
+ goto err_insert;
+ }
+
+ dmfs_rule->attr = attr;
+
+ return &dmfs_rule->fs_rule;
+
+err_insert:
+ kfree(dmfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_dmfs_rule,
+ fs_rule);
+
+ mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr);
+ kfree(dmfs_rule);
+}
+
+static struct mlx5_ct_fs_ops dmfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_dmfs_init,
+ .destroy = mlx5_ct_fs_dmfs_destroy,
+};
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void)
+{
+ return &dmfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
new file mode 100644
index 000000000000..2b80fe73549d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/refcount.h>
+
+#include "en_tc.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#include "lib/smfs.h"
+
+#define INIT_ERR_PREFIX "ct_fs_smfs init failed"
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args)
+#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16)
+
+struct mlx5_ct_fs_smfs_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+ struct list_head list;
+ int prio;
+ refcount_t ref;
+};
+
+struct mlx5_ct_fs_smfs_matchers {
+ struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
+ struct list_head used;
+};
+
+struct mlx5_ct_fs_smfs {
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl;
+ struct mlx5_ct_fs_smfs_matchers matchers;
+ struct mlx5_ct_fs_smfs_matchers matchers_nat;
+ struct mlx5dr_action *fwd_action;
+ struct mlx5_flow_table *ct_nat;
+ struct mutex lock; /* Guards matchers */
+};
+
+struct mlx5_ct_fs_smfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5dr_rule *rule;
+ struct mlx5dr_action *count_action;
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+};
+
+static inline void
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+ bool gre)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+
+ if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version)))
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ if (likely(ipv4)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6));
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6));
+ }
+
+ if (likely(tcp)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(MLX5_CT_TCP_FLAGS_MASK));
+ } else if (!gre) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
+ }
+
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK);
+}
+
+static struct mlx5dr_matcher *
+mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
+ bool tcp, bool gre, u32 priority)
+{
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
+
+ dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
+ kvfree(spec);
+ if (!dr_matcher)
+ return ERR_PTR(-EINVAL);
+
+ return dr_matcher;
+}
+
+static struct mlx5_ct_fs_smfs_matcher *
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
+ struct mlx5_ct_fs_smfs_matchers *matchers;
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5dr_table *tbl;
+ struct list_head *prev;
+ int prio;
+
+ matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
+ smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
+
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ return smfs_matcher;
+
+ mutex_lock(&fs_smfs->lock);
+
+ /* Retry with lock, as another thread might have already created the relevant matcher
+ * till we acquired the lock
+ */
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ goto out_unlock;
+
+ // Find next available priority in sorted used list
+ prio = 0;
+ prev = &matchers->used;
+ list_for_each_entry(m, &matchers->used, list) {
+ prev = &m->list;
+
+ if (m->prio == prio)
+ prio = m->prio + 1;
+ else
+ break;
+ }
+
+ tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
+ dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
+ if (IS_ERR(dr_matcher)) {
+ netdev_warn(fs->netdev,
+ "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+ nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
+
+ smfs_matcher = ERR_CAST(dr_matcher);
+ goto out_unlock;
+ }
+
+ smfs_matcher->dr_matcher = dr_matcher;
+ smfs_matcher->prio = prio;
+ list_add(&smfs_matcher->list, prev);
+ refcount_set(&smfs_matcher->ref, 1);
+
+out_unlock:
+ mutex_unlock(&fs_smfs->lock);
+ return smfs_matcher;
+}
+
+static void
+mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock))
+ return;
+
+ mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher);
+ list_del(&smfs_matcher->list);
+ mutex_unlock(&fs_smfs->lock);
+}
+
+static int
+mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl;
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct);
+ ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat);
+ ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct);
+ fs_smfs->ct_nat = ct_nat;
+
+ if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) {
+ netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables");
+ return -EOPNOTSUPP;
+ }
+
+ ct_dbg("using smfs steering");
+
+ fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl);
+ if (!fs_smfs->fwd_action) {
+ return -EINVAL;
+ }
+
+ fs_smfs->ct_tbl = ct_tbl;
+ fs_smfs->ct_nat_tbl = ct_nat_tbl;
+ mutex_init(&fs_smfs->lock);
+ INIT_LIST_HEAD(&fs_smfs->matchers.used);
+ INIT_LIST_HEAD(&fs_smfs->matchers_nat.used);
+
+ return 0;
+}
+
+static void
+mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ mlx5_smfs_action_destroy(fs_smfs->fwd_action);
+}
+
+static inline bool
+mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
+{
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+ const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+ const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+ const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
+
+ return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
+ used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
+}
+
+static bool
+mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule)
+{
+ struct flow_match_ipv4_addrs ipv4_addrs;
+ struct flow_match_ipv6_addrs ipv6_addrs;
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+ struct flow_match_ports ports;
+ struct flow_match_tcp tcp;
+
+ if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) {
+ ct_dbg("rule uses unexpected dissectors (0x%08x)",
+ flow_rule->match.dissector->used_keys);
+ return false;
+ }
+
+ flow_rule_match_basic(flow_rule, &basic);
+ flow_rule_match_control(flow_rule, &control);
+ flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
+ flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
+ if (basic.key->ip_proto != IPPROTO_GRE)
+ flow_rule_match_ports(flow_rule, &ports);
+ if (basic.key->ip_proto == IPPROTO_TCP)
+ flow_rule_match_tcp(flow_rule, &tcp);
+
+ if (basic.mask->n_proto != htons(0xFFFF) ||
+ (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
+ basic.mask->ip_proto != 0xFF ||
+ (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+ basic.key->ip_proto != IPPROTO_GRE)) {
+ ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
+ ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
+ basic.key->ip_proto, basic.mask->ip_proto);
+ return false;
+ }
+
+ if (basic.key->ip_proto != IPPROTO_GRE &&
+ (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
+ ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
+ ports.mask->src, ports.mask->dst);
+ return false;
+ }
+
+ if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) {
+ ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags);
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+ struct mlx5_ct_fs_smfs_rule *smfs_rule;
+ struct mlx5dr_action *actions[5];
+ struct mlx5dr_rule *rule;
+ int num_actions = 0, err;
+ bool nat, tcp, ipv4, gre;
+
+ if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL);
+ if (!smfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter));
+ if (!smfs_rule->count_action) {
+ err = -EINVAL;
+ goto err_count;
+ }
+
+ actions[num_actions++] = smfs_rule->count_action;
+ actions[num_actions++] = attr->modify_hdr->action.dr_action;
+ actions[num_actions++] = fs_smfs->fwd_action;
+
+ nat = (attr->ft == fs_smfs->ct_nat);
+ ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
+ tcp = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_TCP;
+ gre = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_GRE;
+
+ smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
+ if (IS_ERR(smfs_matcher)) {
+ err = PTR_ERR(smfs_matcher);
+ goto err_matcher;
+ }
+
+ rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
+ spec->flow_context.flow_source);
+ if (!rule) {
+ err = -EINVAL;
+ goto err_create;
+ }
+
+ smfs_rule->rule = rule;
+ smfs_rule->smfs_matcher = smfs_matcher;
+
+ return &smfs_rule->fs_rule;
+
+err_create:
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher);
+err_matcher:
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+err_count:
+ kfree(smfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_smfs_rule,
+ fs_rule);
+
+ mlx5_smfs_rule_destroy(smfs_rule->rule);
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher);
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+ kfree(smfs_rule);
+}
+
+static struct mlx5_ct_fs_ops fs_smfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_smfs_init,
+ .destroy = mlx5_ct_fs_smfs_destroy,
+
+ .priv_size = sizeof(struct mlx5_ct_fs_smfs),
+};
+
+struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return &fs_smfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
new file mode 100644
index 000000000000..ca834bbcb44f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/fs.h>
+#include "en/mapping.h"
+#include "en/tc/int_port.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_int_port {
+ enum mlx5e_tc_int_port_type type;
+ int ifindex;
+ u32 match_metadata;
+ u32 mapping;
+ struct list_head list;
+ struct mlx5_flow_handle *rx_rule;
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+};
+
+struct mlx5e_tc_int_port_priv {
+ struct mlx5_core_dev *dev;
+ struct mutex int_ports_lock; /* Protects int ports list */
+ struct list_head int_ports; /* Uses int_ports_lock */
+ u16 num_ports;
+ bool ul_rep_rx_ready; /* Set when uplink is performing teardown */
+ struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */
+};
+
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_GEN(esw->dev, reg_c_preserve);
+}
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port)
+{
+ return int_port->match_metadata;
+}
+
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+ /* For egress forwarding we can have the case
+ * where the packet came from a vport and redirected
+ * to int port or it came from the uplink, going
+ * via internal port and hairpinned back to uplink
+ * so we set the source to any port in this case.
+ */
+ return int_port->type == MLX5E_TC_INT_PORT_EGRESS ?
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT :
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+ return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_int_port *int_port,
+ struct mlx5_flow_destination *dest)
+
+{
+ struct mlx5_flow_context *flow_context;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5e_tc_int_port_get_metadata_for_match(int_port));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ /* Overwrite flow tag with the int port metadata mapping
+ * instead of the chain mapping.
+ */
+ flow_context = &spec->flow_context;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = int_port->mapping;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+ &flow_act, dest, 1);
+ if (IS_ERR(flow_rule))
+ mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n",
+ PTR_ERR(flow_rule));
+
+ kvfree(spec);
+
+ return flow_rule;
+}
+
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ if (!priv->ul_rep_rx_ready)
+ goto not_found;
+
+ list_for_each_entry(int_port, &priv->int_ports, list)
+ if (int_port->ifindex == ifindex && int_port->type == type) {
+ refcount_inc(&int_port->refcnt);
+ return int_port;
+ }
+
+not_found:
+ return NULL;
+}
+
+static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex, enum mlx5e_tc_int_port_type type,
+ u32 *id)
+{
+ u32 mapped_key[2] = {type, ifindex};
+ int err;
+
+ err = mapping_add(priv->metadata_mapping, mapped_key, id);
+ if (err)
+ return err;
+
+ /* Fill upper 4 bits of PFNUM with reserved value */
+ *id |= 0xf << ESW_VPORT_BITS;
+
+ return 0;
+}
+
+static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv,
+ u32 id)
+{
+ id &= (1 << ESW_VPORT_BITS) - 1;
+ mapping_remove(priv->metadata_mapping, id);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+ struct mlx5_mapped_obj mapped_obj = {};
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_int_port *int_port;
+ struct mlx5_flow_destination dest;
+ struct mapping_ctx *ctx;
+ u32 match_metadata;
+ u32 mapping;
+ int err;
+
+ if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) {
+ mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d",
+ MLX5E_TC_MAX_INT_PORT_NUM);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ int_port = kzalloc(sizeof(*int_port), GFP_KERNEL);
+ if (!int_port)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata);
+ if (err) {
+ mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d",
+ ifindex);
+ goto err_metadata;
+ }
+
+ /* map metadata to reg_c0 object for miss handling */
+ ctx = esw->offloads.reg_c0_obj_pool;
+ mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA;
+ mapped_obj.int_port_metadata = match_metadata;
+ err = mapping_add(ctx, &mapped_obj, &mapping);
+ if (err)
+ goto err_map;
+
+ int_port->type = type;
+ int_port->ifindex = ifindex;
+ int_port->match_metadata = match_metadata;
+ int_port->mapping = mapping;
+
+ /* Create a match on internal vport metadata in vport table */
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = uplink_rpriv->root_ft;
+
+ int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest);
+ if (IS_ERR(int_port->rx_rule)) {
+ err = PTR_ERR(int_port->rx_rule);
+ mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err);
+ goto err_rx_rule;
+ }
+
+ refcount_set(&int_port->refcnt, 1);
+ list_add_rcu(&int_port->list, &priv->int_ports);
+ priv->num_ports++;
+
+ return int_port;
+
+err_rx_rule:
+ mapping_remove(ctx, int_port->mapping);
+
+err_map:
+ mlx5e_int_port_metadata_free(priv, match_metadata);
+
+err_metadata:
+ kfree(int_port);
+
+ return ERR_PTR(err);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static void
+mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port)
+{
+ struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+ struct mapping_ctx *ctx;
+
+ ctx = esw->offloads.reg_c0_obj_pool;
+
+ list_del_rcu(&int_port->list);
+
+ /* The following parameters are not used by the
+ * rcu readers of this int_port object so it is
+ * safe to release them.
+ */
+ if (int_port->rx_rule)
+ mlx5_del_flow_rules(int_port->rx_rule);
+ mapping_remove(ctx, int_port->mapping);
+ mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
+ kfree_rcu(int_port);
+ priv->num_ports--;
+}
+
+/* Must be called with rcu_read_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv,
+ u32 metadata)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ list_for_each_entry_rcu(int_port, &priv->int_ports, list)
+ if (int_port->match_metadata == metadata)
+ return int_port;
+
+ return NULL;
+}
+
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->int_ports_lock);
+
+ /* Reject request if ul rep not ready */
+ if (!priv->ul_rep_rx_ready) {
+ int_port = ERR_PTR(-EOPNOTSUPP);
+ goto done;
+ }
+
+ int_port = mlx5e_int_port_lookup(priv, ifindex, type);
+ if (int_port)
+ goto done;
+
+ /* Alloc and add new int port to list */
+ int_port = mlx5e_int_port_add(priv, ifindex, type);
+
+done:
+ mutex_unlock(&priv->int_ports_lock);
+
+ return int_port;
+}
+
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port)
+{
+ if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock))
+ return;
+
+ mlx5e_int_port_remove(priv, int_port);
+ mutex_unlock(&priv->int_ports_lock);
+}
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+ u64 mapping_id;
+
+ if (!mlx5e_tc_int_port_supported(esw))
+ return NULL;
+
+ int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL);
+ if (!int_port_priv)
+ return NULL;
+
+ mapping_id = mlx5_query_nic_system_image_guid(priv->mdev);
+
+ int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT,
+ sizeof(u32) * 2,
+ (1 << ESW_VPORT_BITS) - 1, true);
+ if (IS_ERR(int_port_priv->metadata_mapping)) {
+ mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n",
+ PTR_ERR(int_port_priv->metadata_mapping));
+ goto err_mapping;
+ }
+
+ int_port_priv->dev = priv->mdev;
+ mutex_init(&int_port_priv->int_ports_lock);
+ INIT_LIST_HEAD(&int_port_priv->int_ports);
+
+ return int_port_priv;
+
+err_mapping:
+ kfree(int_port_priv);
+
+ return NULL;
+}
+
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv)
+{
+ if (!priv)
+ return;
+
+ mutex_destroy(&priv->int_ports_lock);
+ mapping_destroy(priv->metadata_mapping);
+ kfree(priv);
+}
+
+/* Int port rx rules reside in ul rep rx tables.
+ * It is possible the ul rep will go down while there are
+ * still int port rules in its rx table so proper cleanup
+ * is required to free resources.
+ */
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_tc_int_port_priv *ppriv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ ppriv = uplink_priv->int_port_priv;
+
+ if (!ppriv)
+ return;
+
+ mutex_lock(&ppriv->int_ports_lock);
+ ppriv->ul_rep_rx_ready = true;
+ mutex_unlock(&ppriv->int_ports_lock);
+}
+
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_tc_int_port_priv *ppriv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_int_port *int_port;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ ppriv = uplink_priv->int_port_priv;
+
+ if (!ppriv)
+ return;
+
+ mutex_lock(&ppriv->int_ports_lock);
+
+ ppriv->ul_rep_rx_ready = false;
+
+ list_for_each_entry(int_port, &ppriv->int_ports, list) {
+ if (!IS_ERR_OR_NULL(int_port->rx_rule))
+ mlx5_del_flow_rules(int_port->rx_rule);
+
+ int_port->rx_rule = NULL;
+ }
+
+ mutex_unlock(&ppriv->int_ports_lock);
+}
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+ struct sk_buff *skb, u32 int_vport_metadata,
+ bool *forward_tx)
+{
+ enum mlx5e_tc_int_port_type fwd_type;
+ struct mlx5e_tc_int_port *int_port;
+ struct net_device *dev;
+ int ifindex;
+
+ if (!priv)
+ return false;
+
+ rcu_read_lock();
+ int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata);
+ if (!int_port) {
+ rcu_read_unlock();
+ mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n",
+ int_vport_metadata);
+ return false;
+ }
+
+ ifindex = int_port->ifindex;
+ fwd_type = int_port->type;
+ rcu_read_unlock();
+
+ dev = dev_get_by_index(&init_net, ifindex);
+ if (!dev) {
+ mlx5_core_dbg(priv->dev,
+ "Couldn't find internal port device with ifindex: %d\n",
+ ifindex);
+ return false;
+ }
+
+ skb->skb_iif = dev->ifindex;
+ skb->dev = dev;
+
+ if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) {
+ skb->pkt_type = PACKET_HOST;
+ skb_set_redirected(skb, true);
+ *forward_tx = false;
+ } else {
+ skb_reset_network_header(skb);
+ skb_push_rcsum(skb, skb->mac_len);
+ skb_set_redirected(skb, false);
+ *forward_tx = true;
+ }
+
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
new file mode 100644
index 000000000000..e72c79d308d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_INT_PORT_H__
+#define __MLX5_EN_TC_INT_PORT_H__
+
+#include "en.h"
+
+struct mlx5e_tc_int_port;
+struct mlx5e_tc_int_port_priv;
+
+enum mlx5e_tc_int_port_type {
+ MLX5E_TC_INT_PORT_INGRESS,
+ MLX5E_TC_INT_PORT_EGRESS,
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv);
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv);
+
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv);
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv);
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+ struct sk_buff *skb, u32 int_vport_metadata,
+ bool *forward_tx);
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type);
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port);
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port);
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port);
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline u32
+mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+ return 0;
+}
+
+static inline int
+mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+ return 0;
+}
+
+static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+ return false;
+}
+
+static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {}
+static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+#endif /* __MLX5_EN_TC_INT_PORT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
new file mode 100644
index 000000000000..be74e1403328
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/math64.h>
+#include "lib/aso.h"
+#include "en/tc/post_act.h"
+#include "meter.h"
+#include "en/tc_priv.h"
+
+#define MLX5_START_COLOR_SHIFT 28
+#define MLX5_METER_MODE_SHIFT 24
+#define MLX5_CBS_EXP_SHIFT 24
+#define MLX5_CBS_MAN_SHIFT 16
+#define MLX5_CIR_EXP_SHIFT 8
+
+/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */
+#define MLX5_CONST_CIR 8000000000ULL
+#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e))
+#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1)
+
+/* cbs = cbs_mantissa*2^cbs_exponent */
+#define MLX5_CALC_CBS(m, e) ((m) << (e))
+#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1)
+#define MLX5_MAX_HW_CBS 0x7FFFFFFF
+
+struct mlx5e_flow_meter_aso_obj {
+ struct list_head entry;
+ int base_id;
+ int total_meters;
+
+ unsigned long meters_map[0]; /* must be at the end of this struct */
+};
+
+struct mlx5e_flow_meters {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_aso *aso;
+ struct mutex aso_lock; /* Protects aso operations */
+ int log_granularity;
+ u32 pdn;
+
+ DECLARE_HASHTABLE(hashtbl, 8);
+
+ struct mutex sync_lock; /* protect flow meter operations */
+ struct list_head partial_list;
+ struct list_head full_list;
+
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_post_act *post_act;
+};
+
+static void
+mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp)
+{
+ s64 _cir, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cir << e;
+ if ((s64)m < 0) /* overflow */
+ break;
+ m = div64_u64(m, MLX5_CONST_CIR);
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cir = MLX5_CALC_CIR(m, e);
+ _delta = cir - _cir;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+static void
+mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp)
+{
+ s64 _cbs, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cbs >> e;
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cbs = MLX5_CALC_CBS(m, e);
+ _delta = cbs - _cbs;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params)
+{
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl;
+ struct mlx5_wqe_aso_data_seg *aso_data;
+ struct mlx5e_flow_meters *flow_meters;
+ u8 cir_man, cir_exp, cbs_man, cbs_exp;
+ struct mlx5_aso_wqe *aso_wqe;
+ unsigned long expires;
+ struct mlx5_aso *aso;
+ u64 rate, burst;
+ u8 ds_cnt;
+ int err;
+
+ rate = meter_params->rate;
+ burst = meter_params->burst;
+
+ /* HW treats each packet as 128 bytes in PPS mode */
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS) {
+ rate <<= 10;
+ burst <<= 7;
+ }
+
+ if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS)
+ return -EINVAL;
+
+ /* HW has limitation of total 31 bits for cbs */
+ if (burst > MLX5_MAX_HW_CBS) {
+ mlx5_core_warn(mdev,
+ "burst(%lld) is too large, use HW allowed value(%d)\n",
+ burst, MLX5_MAX_HW_CBS);
+ burst = MLX5_MAX_HW_CBS;
+ }
+
+ mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode);
+ mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp);
+ mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n",
+ rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man);
+ mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp);
+ mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n",
+ burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man);
+
+ if (!cir_man || !cbs_man)
+ return -EINVAL;
+
+ flow_meters = meter->flow_meters;
+ aso = flow_meters->aso;
+
+ mutex_lock(&flow_meters->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(aso);
+ ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS);
+ mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER);
+
+ aso_ctrl = &aso_wqe->aso_ctrl;
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6;
+ aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
+ aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6;
+ aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32));
+
+ aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1);
+ memset(aso_data, 0, sizeof(*aso_data));
+ aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */
+ (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT));
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS)
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT);
+ else
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT);
+
+ aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) |
+ (cbs_man << MLX5_CBS_MAN_SHIFT) |
+ (cir_exp << MLX5_CIR_EXP_SHIFT) |
+ cir_man);
+
+ mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl);
+
+ /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ err = mlx5_aso_poll_cq(aso, true);
+ if (err)
+ usleep_range(2, 10);
+ } while (err && time_is_after_jiffies(expires));
+ mutex_unlock(&flow_meters->aso_lock);
+
+ return err;
+}
+
+static int
+mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ void *obj;
+ int err;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity);
+
+ obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj);
+ MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err) {
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id);
+ }
+
+ return err;
+}
+
+static void
+mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
+{
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5_fc *counter;
+ int err, pos, total;
+ u32 id;
+
+ meter = kzalloc(sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_red_counter;
+ }
+ meter->red_counter = counter;
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_green_counter;
+ }
+ meter->green_counter = counter;
+
+ meters_obj = list_first_entry_or_null(&flow_meters->partial_list,
+ struct mlx5e_flow_meter_aso_obj,
+ entry);
+ /* 2 meters in one object */
+ total = 1 << (flow_meters->log_granularity + 1);
+ if (!meters_obj) {
+ err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create flow meter ASO object\n");
+ goto err_create;
+ }
+
+ meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total),
+ GFP_KERNEL);
+ if (!meters_obj) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ meters_obj->base_id = id;
+ meters_obj->total_meters = total;
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ pos = 0;
+ } else {
+ pos = find_first_zero_bit(meters_obj->meters_map, total);
+ if (bitmap_weight(meters_obj->meters_map, total) == total - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->full_list);
+ }
+ }
+
+ bitmap_set(meters_obj->meters_map, pos, 1);
+ meter->flow_meters = flow_meters;
+ meter->meters_obj = meters_obj;
+ meter->obj_id = meters_obj->base_id + pos / 2;
+ meter->idx = pos % 2;
+
+ mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+
+ return meter;
+
+err_mem:
+ mlx5e_flow_meter_destroy_aso_obj(mdev, id);
+err_create:
+ mlx5_fc_destroy(mdev, meter->green_counter);
+err_green_counter:
+ mlx5_fc_destroy(mdev, meter->red_counter);
+err_red_counter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static void
+__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ int n, pos;
+
+ mlx5_fc_destroy(mdev, meter->green_counter);
+ mlx5_fc_destroy(mdev, meter->red_counter);
+
+ meters_obj = meter->meters_obj;
+ pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx;
+ bitmap_clear(meters_obj->meters_map, pos, 1);
+ n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters);
+ if (n == 0) {
+ list_del(&meters_obj->entry);
+ mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id);
+ kfree(meters_obj);
+ } else if (n == meters_obj->total_meters - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ }
+
+ mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+ kfree(meter);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index)
+ if (meter->params.index == index)
+ goto add_ref;
+
+ return ERR_PTR(-ENOENT);
+
+add_ref:
+ meter->refcnt++;
+
+ return meter;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ mutex_unlock(&flow_meters->sync_lock);
+
+ return meter;
+}
+
+static void
+__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ if (--meter->refcnt == 0) {
+ hash_del(&meter->hlist);
+ __mlx5e_flow_meter_free(meter);
+ }
+}
+
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+
+ mutex_lock(&flow_meters->sync_lock);
+ __mlx5e_tc_meter_put(meter);
+ mutex_unlock(&flow_meters->sync_lock);
+}
+
+static struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = __mlx5e_flow_meter_alloc(flow_meters);
+ if (IS_ERR(meter))
+ return meter;
+
+ hash_add(flow_meters->hashtbl, &meter->hlist, params->index);
+ meter->params.index = params->index;
+ meter->refcnt++;
+
+ return meter;
+}
+
+static int
+__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ int err = 0;
+
+ if (meter->params.mode != params->mode || meter->params.rate != params->rate ||
+ meter->params.burst != params->burst) {
+ err = mlx5e_tc_meter_modify(mdev, meter, params);
+ if (err)
+ goto out;
+
+ meter->params.mode = params->mode;
+ meter->params.rate = params->rate;
+ meter->params.burst = params->burst;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&flow_meters->sync_lock);
+ err = __mlx5e_tc_meter_update(meter, params);
+ mutex_unlock(&flow_meters->sync_lock);
+ return err;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ if (IS_ERR(meter)) {
+ meter = mlx5e_tc_meter_alloc(flow_meters, params);
+ if (IS_ERR(meter)) {
+ err = PTR_ERR(meter);
+ goto err_get;
+ }
+ }
+
+ err = __mlx5e_tc_meter_update(meter, params);
+ if (err)
+ goto err_update;
+
+ mutex_unlock(&flow_meters->sync_lock);
+ return meter;
+
+err_update:
+ __mlx5e_tc_meter_put(meter);
+err_get:
+ mutex_unlock(&flow_meters->sync_lock);
+ return ERR_PTR(err);
+}
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters)
+{
+ return flow_meters->ns_type;
+}
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (IS_ERR_OR_NULL(post_act)) {
+ netdev_dbg(priv->netdev,
+ "flow meter offload is not supported, post action is missing\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL);
+ if (!flow_meters)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err);
+ goto err_out;
+ }
+
+ flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn);
+ if (IS_ERR(flow_meters->aso)) {
+ mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n");
+ err = PTR_ERR(flow_meters->aso);
+ goto err_sq;
+ }
+
+ mutex_init(&flow_meters->sync_lock);
+ INIT_LIST_HEAD(&flow_meters->partial_list);
+ INIT_LIST_HEAD(&flow_meters->full_list);
+
+ flow_meters->ns_type = ns_type;
+ flow_meters->mdev = mdev;
+ flow_meters->post_act = post_act;
+ mutex_init(&flow_meters->aso_lock);
+ flow_meters->log_granularity = min_t(int, 6,
+ MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc));
+
+ return flow_meters;
+
+err_sq:
+ mlx5_core_dealloc_pd(mdev, flow_meters->pdn);
+err_out:
+ kfree(flow_meters);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters)
+{
+ if (IS_ERR_OR_NULL(flow_meters))
+ return;
+
+ mlx5_aso_destroy(flow_meters->aso);
+ mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn);
+ kfree(flow_meters);
+}
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse)
+{
+ u64 bytes1, packets1, lastuse1;
+ u64 bytes2, packets2, lastuse2;
+
+ mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1);
+ mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2);
+
+ *bytes = bytes1 + bytes2;
+ *packets = packets1 + packets2;
+ *drops = packets2;
+ *lastuse = max_t(u64, lastuse1, lastuse2);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
new file mode 100644
index 000000000000..6de6e8a16327
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_FLOW_METER_H__
+#define __MLX5_EN_FLOW_METER_H__
+
+struct mlx5e_post_meter_priv;
+struct mlx5e_flow_meter_aso_obj;
+struct mlx5e_flow_meters;
+struct mlx5_flow_attr;
+
+enum mlx5e_flow_meter_mode {
+ MLX5_RATE_LIMIT_BPS,
+ MLX5_RATE_LIMIT_PPS,
+};
+
+struct mlx5e_flow_meter_params {
+ enum mlx5e_flow_meter_mode mode;
+ /* police action index */
+ u32 index;
+ u64 rate;
+ u64 burst;
+};
+
+struct mlx5e_flow_meter_handle {
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ u32 obj_id;
+ u8 idx;
+
+ int refcnt;
+ struct hlist_node hlist;
+ struct mlx5e_flow_meter_params params;
+
+ struct mlx5_fc *green_counter;
+ struct mlx5_fc *red_counter;
+};
+
+struct mlx5e_meter_attr {
+ struct mlx5e_flow_meter_params params;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5e_post_meter_priv *post_meter;
+};
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params);
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter);
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params);
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters);
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_action);
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters);
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse);
+
+#endif /* __MLX5_EN_FLOW_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
new file mode 100644
index 000000000000..4e48946c4c2a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "en_tc.h"
+#include "post_act.h"
+#include "mlx5_core.h"
+#include "fs_core.h"
+
+struct mlx5e_post_act {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_flow_table *ft;
+ struct mlx5e_priv *priv;
+ struct xarray ids;
+};
+
+struct mlx5e_post_act_handle {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_handle *rule;
+ u32 id;
+};
+
+#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ enum fs_flow_table_type table_type = ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FS_FT_FDB : FS_FT_NIC_RX;
+ struct mlx5e_post_act *post_act;
+ int err;
+
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) {
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ }
+
+ post_act = kzalloc(sizeof(*post_act), GFP_KERNEL);
+ if (!post_act) {
+ err = -ENOMEM;
+ goto err_check;
+ }
+ post_act->ft = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(post_act->ft)) {
+ err = PTR_ERR(post_act->ft);
+ mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err);
+ goto err_ft;
+ }
+ post_act->chains = chains;
+ post_act->ns_type = ns_type;
+ post_act->priv = priv;
+ xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1);
+ return post_act;
+
+err_ft:
+ kfree(post_act);
+err_check:
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
+{
+ if (IS_ERR_OR_NULL(post_act))
+ return;
+
+ xa_destroy(&post_act->ids);
+ mlx5_chains_destroy_global_table(post_act->chains, post_act->ft);
+ kfree(post_act);
+}
+
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Post action rule matches on fte_id and executes original rule's tc rule action */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+
+ kvfree(spec);
+ return 0;
+
+err_rule:
+ kvfree(spec);
+ return err;
+}
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
+{
+ u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
+ struct mlx5e_post_act_handle *handle;
+ struct mlx5_flow_attr *post_attr;
+ int err;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
+ if (!handle || !post_attr) {
+ kfree(post_attr);
+ kfree(handle);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(post_attr, attr, attr_sz);
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = post_act->ft;
+ post_attr->inner_match_level = MLX5_MATCH_NONE;
+ post_attr->outer_match_level = MLX5_MATCH_NONE;
+ post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+
+ handle->ns_type = post_act->ns_type;
+ /* Splits were handled before post action */
+ if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ post_attr->esw_attr->split_count = 0;
+
+ err = xa_alloc(&post_act->ids, &handle->id, post_attr,
+ XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL);
+ if (err)
+ goto err_xarray;
+
+ handle->attr = post_attr;
+
+ return handle;
+
+err_xarray:
+ kfree(post_attr);
+ kfree(handle);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr);
+ handle->rule = NULL;
+}
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
+{
+ if (!IS_ERR_OR_NULL(handle->rule))
+ mlx5e_tc_post_act_unoffload(post_act, handle);
+ xa_erase(&post_act->ids, handle->id);
+ kfree(handle->attr);
+ kfree(handle);
+}
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act)
+{
+ return post_act->ft;
+}
+
+/* Allocate a header modify action to write the post action handle fte id to a register. */
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts)
+{
+ return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
new file mode 100644
index 000000000000..f476774c0b75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_POST_ACTION_H__
+#define __MLX5_POST_ACTION_H__
+
+#include "en.h"
+#include "lib/fs_chains.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_priv;
+struct mlx5e_tc_mod_hdr_acts;
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
+
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts);
+
+#endif /* __MLX5_POST_ACTION_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
new file mode 100644
index 000000000000..8b77e822810e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "post_meter.h"
+#include "en/tc/post_act.h"
+
+#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG)
+#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG)
+
+struct mlx5e_post_meter_priv {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *fwd_green_rule;
+ struct mlx5_flow_handle *drop_red_rule;
+};
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->ft;
+}
+
+static int
+mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+
+ root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type);
+ if (!root_ns) {
+ mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(post_meter->ft)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
+ return PTR_ERR(post_meter->ft);
+ }
+
+ return 0;
+}
+
+static int
+mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *misc2, *match_criteria;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in);
+ if (IS_ERR(post_meter->fg)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n");
+ err = PTR_ERR(post_meter->fg);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static int
+mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[0].counter_id = mlx5_fc_id(red_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n");
+ err = PTR_ERR(rule);
+ goto err_red;
+ }
+ post_meter->drop_red_rule = rule;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = mlx5e_tc_post_act_get_ft(post_act);
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(green_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n");
+ err = PTR_ERR(rule);
+ goto err_green;
+ }
+ post_meter->fwd_green_rule = rule;
+
+ kvfree(spec);
+ return 0;
+
+err_green:
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+err_red:
+ kvfree(spec);
+ return err;
+}
+
+static void
+mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+ mlx5_del_flow_rules(post_meter->fwd_green_rule);
+}
+
+static void
+mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_group(post_meter->fg);
+}
+
+static void
+mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_table(post_meter->ft);
+}
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5e_post_meter_priv *post_meter;
+ int err;
+
+ post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL);
+ if (!post_meter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_post_meter_table_create(priv, ns_type, post_meter);
+ if (err)
+ goto err_ft;
+
+ err = mlx5e_post_meter_fg_create(priv, post_meter);
+ if (err)
+ goto err_fg;
+
+ err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter,
+ red_counter);
+ if (err)
+ goto err_rules;
+
+ return post_meter;
+
+err_rules:
+ mlx5e_post_meter_fg_destroy(post_meter);
+err_fg:
+ mlx5e_post_meter_table_destroy(post_meter);
+err_ft:
+ kfree(post_meter);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_rules_destroy(post_meter);
+ mlx5e_post_meter_fg_destroy(post_meter);
+ mlx5e_post_meter_table_destroy(post_meter);
+ kfree(post_meter);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
new file mode 100644
index 000000000000..34d0e4b9fc7a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_POST_METER_H__
+#define __MLX5_EN_POST_METER_H__
+
+#define packet_color_to_reg { \
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \
+ .moffset = 0, \
+ .mlen = 8, \
+ .soffset = MLX5_BYTE_OFF(fte_match_param, \
+ misc_parameters_2.metadata_reg_c_5), \
+}
+
+struct mlx5e_post_meter_priv;
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter);
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter);
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter);
+
+#endif /* __MLX5_EN_POST_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
new file mode 100644
index 000000000000..1cbd2eb9d04f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/skbuff.h>
+#include <net/psample.h>
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en/tc/act/sample.h"
+#include "en/mod_hdr.h"
+#include "sample.h"
+#include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
+
+static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
+ .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
+ .max_num_groups = 0, /* default num of groups */
+ .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
+};
+
+struct mlx5e_tc_psample {
+ struct mlx5_eswitch *esw;
+ struct mlx5_flow_table *termtbl;
+ struct mlx5_flow_handle *termtbl_rule;
+ DECLARE_HASHTABLE(hashtbl, 8);
+ struct mutex ht_lock; /* protect hashtbl */
+ DECLARE_HASHTABLE(restore_hashtbl, 8);
+ struct mutex restore_lock; /* protect restore_hashtbl */
+ struct mlx5e_post_act *post_act;
+};
+
+struct mlx5e_sampler {
+ struct hlist_node hlist;
+ u32 sampler_id;
+ u32 sample_ratio;
+ u32 sample_table_id;
+ u32 default_table_id;
+ int count;
+};
+
+struct mlx5e_sample_flow {
+ struct mlx5e_sampler *sampler;
+ struct mlx5e_sample_restore *restore;
+ struct mlx5_flow_attr *pre_attr;
+ struct mlx5_flow_handle *pre_rule;
+ struct mlx5_flow_attr *post_attr;
+ struct mlx5_flow_handle *post_rule;
+};
+
+struct mlx5e_sample_restore {
+ struct hlist_node hlist;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_flow_handle *rule;
+ u32 obj_id;
+ int count;
+};
+
+static int
+sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_act act = {};
+ int err;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) {
+ mlx5_core_warn(dev, "termination table is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 1;
+ ft_attr.level = 1;
+ tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(tc_psample->termtbl)) {
+ err = PTR_ERR(tc_psample->termtbl);
+ mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
+ return err;
+ }
+
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.vport.num = esw->manager_vport;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
+ if (IS_ERR(tc_psample->termtbl_rule)) {
+ err = PTR_ERR(tc_psample->termtbl_rule);
+ mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample)
+{
+ mlx5_del_flow_rules(tc_psample->termtbl_rule);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
+}
+
+static int
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler)
+{
+ u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u64 general_obj_types;
+ void *obj;
+ int err;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
+ return -EOPNOTSUPP;
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
+ return -EOPNOTSUPP;
+
+ obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
+ MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
+ MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
+ MLX5_SET(sampler_obj, obj, level, 1);
+ MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
+ MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
+ MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void
+sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static u32
+sampler_hash(u32 sample_ratio, u32 default_table_id)
+{
+ return jhash_2words(sample_ratio, default_table_id, 0);
+}
+
+static int
+sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
+{
+ return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
+}
+
+static struct mlx5e_sampler *
+sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id)
+{
+ struct mlx5e_sampler *sampler;
+ u32 hash_key;
+ int err;
+
+ mutex_lock(&tc_psample->ht_lock);
+ hash_key = sampler_hash(sample_ratio, default_table_id);
+ hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key)
+ if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
+ sample_ratio, default_table_id))
+ goto add_ref;
+
+ sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+ if (!sampler) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ sampler->sample_table_id = tc_psample->termtbl->id;
+ sampler->default_table_id = default_table_id;
+ sampler->sample_ratio = sample_ratio;
+
+ err = sampler_obj_create(tc_psample->esw->dev, sampler);
+ if (err)
+ goto err_create;
+
+ hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key);
+
+add_ref:
+ sampler->count++;
+ mutex_unlock(&tc_psample->ht_lock);
+ return sampler;
+
+err_create:
+ kfree(sampler);
+err_alloc:
+ mutex_unlock(&tc_psample->ht_lock);
+ return ERR_PTR(err);
+}
+
+static void
+sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
+{
+ mutex_lock(&tc_psample->ht_lock);
+ if (--sampler->count == 0) {
+ hash_del(&sampler->hlist);
+ sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id);
+ kfree(sampler);
+ }
+ mutex_unlock(&tc_psample->ht_lock);
+}
+
+/* obj_id is used to restore the sample parameters.
+ * Set fte_id in original flow table, then match it in the default table.
+ * Only set it for NICs can preserve reg_c or decap action. For other cases,
+ * use the same match in the default table.
+ * Use one header rewrite for both obj_id and fte_id.
+ */
+static struct mlx5_modify_hdr *
+sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ CHAIN_TO_REG, obj_id);
+ if (err)
+ goto err_set_regc0;
+
+ modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts->num_actions,
+ mod_acts->actions);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ goto err_modify_hdr;
+ }
+
+ mlx5e_mod_hdr_dealloc(mod_acts);
+ return modify_hdr;
+
+err_modify_hdr:
+ mlx5e_mod_hdr_dealloc(mod_acts);
+err_set_regc0:
+ return ERR_PTR(err);
+}
+
+static struct mlx5e_sample_restore *
+sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_core_dev *mdev = esw->dev;
+ struct mlx5e_sample_restore *restore;
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ mutex_lock(&tc_psample->restore_lock);
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id)
+ if (restore->obj_id == obj_id)
+ goto add_ref;
+
+ restore = kzalloc(sizeof(*restore), GFP_KERNEL);
+ if (!restore) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ restore->obj_id = obj_id;
+
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ goto err_modify_hdr;
+ }
+ restore->modify_hdr = modify_hdr;
+
+ restore->rule = esw_add_restore_rule(esw, obj_id);
+ if (IS_ERR(restore->rule)) {
+ err = PTR_ERR(restore->rule);
+ goto err_restore;
+ }
+
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id);
+add_ref:
+ restore->count++;
+ mutex_unlock(&tc_psample->restore_lock);
+ return restore;
+
+err_restore:
+ mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
+err_modify_hdr:
+ kfree(restore);
+err_alloc:
+ mutex_unlock(&tc_psample->restore_lock);
+ return ERR_PTR(err);
+}
+
+static void
+sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore)
+{
+ mutex_lock(&tc_psample->restore_lock);
+ if (--restore->count == 0)
+ hash_del(&restore->hlist);
+ mutex_unlock(&tc_psample->restore_lock);
+
+ if (!restore->count) {
+ mlx5_del_flow_rules(restore->rule);
+ mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr);
+ kfree(restore);
+ }
+}
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+{
+ u32 trunc_size = mapped_obj->sample.trunc_size;
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+
+ md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
+ md.in_ifindex = skb->dev->ifindex;
+ psample_group.group_num = mapped_obj->sample.group_id;
+ psample_group.net = &init_net;
+ skb_push(skb, skb->mac_len);
+
+ psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
+}
+
+static int
+add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr,
+ u32 *default_tbl_id)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB);
+ struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+ struct mlx5_flow_table *default_tbl;
+ struct mlx5_flow_attr *post_attr;
+ int err;
+
+ /* Allocate default table per vport, chain and prio. Otherwise, there is
+ * only one default table for the same sampler object. Rules with different
+ * prio and chain may overlap. For CT sample action, per vport default
+ * table is needed to resotre the metadata.
+ */
+ per_vport_tbl_attr.chain = attr->chain;
+ per_vport_tbl_attr.prio = attr->prio;
+ per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+ per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+ if (IS_ERR(default_tbl)) {
+ err = PTR_ERR(default_tbl);
+ goto err_default_tbl;
+ }
+ *default_tbl_id = default_tbl->id;
+
+ post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!post_attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+ sample_flow->post_attr = post_attr;
+ memcpy(post_attr, attr, attr_sz);
+ /* Perform the original matches on the default table.
+ * Offload all actions except the sample action.
+ */
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = default_tbl;
+ post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
+
+ /* When offloading sample and encap action, if there is no valid
+ * neigh data struct, a slow path rule is offloaded first. Source
+ * port metadata match is set at that time. A per vport table is
+ * already allocated. No need to match it again. So clear the source
+ * port metadata match.
+ */
+ mlx5_eswitch_clear_rule_source_port(esw, spec);
+ sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr);
+ if (IS_ERR(sample_flow->post_rule)) {
+ err = PTR_ERR(sample_flow->post_rule);
+ goto err_rule;
+ }
+ return 0;
+
+err_rule:
+ kfree(post_attr);
+err_attr:
+ mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+ return err;
+}
+
+static void
+del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_vport_tbl_attr tbl_attr;
+
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr);
+ kfree(sample_flow->post_attr);
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+}
+
+/* For the following typical flow table:
+ *
+ * +-------------------------------+
+ * + original flow table +
+ * +-------------------------------+
+ * + original match +
+ * +-------------------------------+
+ * + sample action + other actions +
+ * +-------------------------------+
+ *
+ * We translate the tc filter with sample action to the following HW model:
+ *
+ * +---------------------+
+ * + original flow table +
+ * +---------------------+
+ * + original match +
+ * +---------------------+
+ * | set fte_id (if reg_c preserve cap)
+ * | do decap (if required)
+ * v
+ * +------------------------------------------------+
+ * + Flow Sampler Object +
+ * +------------------------------------------------+
+ * + sample ratio +
+ * +------------------------------------------------+
+ * + sample table id | default table id +
+ * +------------------------------------------------+
+ * | |
+ * v v
+ * +-----------------------------+ +-------------------+
+ * + sample table + + default table +
+ * +-----------------------------+ +-------------------+
+ * + forward to management vport + |
+ * +-----------------------------+ |
+ * +-------+------+
+ * | |reg_c preserve cap
+ * | |or decap action
+ * v v
+ * +-----------------+ +-------------+
+ * + per vport table + + post action +
+ * +-----------------+ +-------------+
+ * + original match +
+ * +-----------------+
+ * + other actions +
+ * +-----------------+
+ */
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_esw_flow_attr *pre_esw_attr;
+ struct mlx5_mapped_obj restore_obj = {};
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5e_sample_attr *sample_attr;
+ struct mlx5_flow_attr *pre_attr;
+ u32 tunnel_id = attr->tunnel_id;
+ struct mlx5_eswitch *esw;
+ u32 default_tbl_id;
+ u32 obj_id;
+ int err;
+
+ if (IS_ERR_OR_NULL(tc_psample))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
+ if (!sample_flow)
+ return ERR_PTR(-ENOMEM);
+ sample_attr = &attr->sample_attr;
+ sample_attr->sample_flow = sample_flow;
+
+ /* For NICs with reg_c_preserve support or decap action, use
+ * post action instead of the per vport, chain and prio table.
+ * Only match the fte id instead of the same match in the
+ * original flow table.
+ */
+ esw = tc_psample->esw;
+ if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) {
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
+ default_tbl_id = ft->id;
+ } else {
+ err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
+ if (err)
+ goto err_post_rule;
+ }
+
+ /* Create sampler object. */
+ sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id);
+ if (IS_ERR(sample_flow->sampler)) {
+ err = PTR_ERR(sample_flow->sampler);
+ goto err_sampler;
+ }
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+
+ /* Create an id mapping reg_c0 value to sample object. */
+ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
+ restore_obj.sample.group_id = sample_attr->group_num;
+ restore_obj.sample.rate = sample_attr->rate;
+ restore_obj.sample.trunc_size = sample_attr->trunc_size;
+ restore_obj.sample.tunnel_id = tunnel_id;
+ err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
+ if (err)
+ goto err_obj_id;
+ sample_attr->restore_obj_id = obj_id;
+
+ /* Create sample restore context. */
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts);
+ if (IS_ERR(sample_flow->restore)) {
+ err = PTR_ERR(sample_flow->restore);
+ goto err_sample_restore;
+ }
+
+ /* Perform the original matches on the original table. Offload the
+ * sample action. The destination is the sampler object.
+ */
+ pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!pre_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre_flow_attr;
+ }
+ pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ /* For decap action, do decap in the original flow table instead of the
+ * default flow table.
+ */
+ if (tunnel_id)
+ pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
+ pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
+ pre_attr->inner_match_level = attr->inner_match_level;
+ pre_attr->outer_match_level = attr->outer_match_level;
+ pre_attr->chain = attr->chain;
+ pre_attr->prio = attr->prio;
+ pre_attr->ft = attr->ft;
+ pre_attr->sample_attr = *sample_attr;
+ pre_esw_attr = pre_attr->esw_attr;
+ pre_esw_attr->in_mdev = esw_attr->in_mdev;
+ pre_esw_attr->in_rep = esw_attr->in_rep;
+ sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
+ if (IS_ERR(sample_flow->pre_rule)) {
+ err = PTR_ERR(sample_flow->pre_rule);
+ goto err_pre_offload_rule;
+ }
+ sample_flow->pre_attr = pre_attr;
+
+ return sample_flow->pre_rule;
+
+err_pre_offload_rule:
+ kfree(pre_attr);
+err_alloc_pre_flow_attr:
+ sample_restore_put(tc_psample, sample_flow->restore);
+err_sample_restore:
+ mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
+err_obj_id:
+ sampler_put(tc_psample, sample_flow->sampler);
+err_sampler:
+ if (sample_flow->post_rule)
+ del_post_rule(esw, sample_flow, attr);
+err_post_rule:
+ kfree(sample_flow);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5_eswitch *esw;
+
+ if (IS_ERR_OR_NULL(tc_psample))
+ return;
+
+ /* The following delete order can't be changed, otherwise,
+ * will hit fw syndromes.
+ */
+ esw = tc_psample->esw;
+ sample_flow = attr->sample_attr.sample_flow;
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
+
+ sample_restore_put(tc_psample, sample_flow->restore);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
+ sampler_put(tc_psample, sample_flow->sampler);
+ if (sample_flow->post_rule)
+ del_post_rule(esw, sample_flow, attr);
+
+ kfree(sample_flow->pre_attr);
+ kfree(sample_flow);
+}
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{
+ struct mlx5e_tc_psample *tc_psample;
+ int err;
+
+ tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL);
+ if (!tc_psample)
+ return ERR_PTR(-ENOMEM);
+ if (IS_ERR_OR_NULL(post_act)) {
+ err = PTR_ERR(post_act);
+ goto err_post_act;
+ }
+ tc_psample->post_act = post_act;
+ tc_psample->esw = esw;
+ err = sampler_termtbl_create(tc_psample);
+ if (err)
+ goto err_post_act;
+
+ mutex_init(&tc_psample->ht_lock);
+ mutex_init(&tc_psample->restore_lock);
+
+ return tc_psample;
+
+err_post_act:
+ kfree(tc_psample);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample)
+{
+ if (IS_ERR_OR_NULL(tc_psample))
+ return;
+
+ mutex_destroy(&tc_psample->restore_lock);
+ mutex_destroy(&tc_psample->ht_lock);
+ sampler_termtbl_destroy(tc_psample);
+ kfree(tc_psample);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
new file mode 100644
index 000000000000..a569367eae4d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+#include "eswitch.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_psample;
+struct mlx5e_post_act;
+
+struct mlx5e_sample_attr {
+ u32 group_num;
+ u32 rate;
+ u32 trunc_size;
+ u32 restore_obj_id;
+ u32 sampler_id;
+ struct mlx5e_sample_flow *sample_flow;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
+#else /* CONFIG_MLX5_TC_SAMPLE */
+
+static inline struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr) {}
+
+static inline struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {}
+
+static inline void
+mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {}
+
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
new file mode 100644
index 000000000000..864ce0c393e6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -0,0 +1,2277 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_ct.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/workqueue.h>
+#include <linux/refcount.h>
+#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
+
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+
+#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
+#define MLX5_CT_STATE_TRK_BIT BIT(2)
+#define MLX5_CT_STATE_NAT_BIT BIT(3)
+#define MLX5_CT_STATE_REPLY_BIT BIT(4)
+#define MLX5_CT_STATE_RELATED_BIT BIT(5)
+#define MLX5_CT_STATE_INVALID_BIT BIT(6)
+
+#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
+#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
+
+/* Statically allocate modify actions for
+ * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
+ * This will be increased dynamically if needed (for the ipv6 snat + dnat).
+ */
+#define MLX5_CT_MIN_MOD_ACTS 10
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+
+struct mlx5_tc_ct_debugfs {
+ struct {
+ atomic_t offloaded;
+ atomic_t rx_dropped;
+ } stats;
+
+ struct dentry *root;
+};
+
+struct mlx5_tc_ct_priv {
+ struct mlx5_core_dev *dev;
+ const struct net_device *netdev;
+ struct mod_hdr_tbl *mod_hdr_tbl;
+ struct xarray tuple_ids;
+ struct rhashtable zone_ht;
+ struct rhashtable ct_tuples_ht;
+ struct rhashtable ct_tuples_nat_ht;
+ struct mlx5_flow_table *ct;
+ struct mlx5_flow_table *ct_nat;
+ struct mlx5e_post_act *post_act;
+ struct mutex control_lock; /* guards parallel adds/dels */
+ struct mapping_ctx *zone_mapping;
+ struct mapping_ctx *labels_mapping;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_ct_fs *fs;
+ struct mlx5_ct_fs_ops *fs_ops;
+ spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
+
+ struct mlx5_tc_ct_debugfs debugfs;
+};
+
+struct mlx5_ct_flow {
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_flow_handle *pre_ct_rule;
+ struct mlx5_ct_ft *ft;
+ u32 chain_mapping;
+};
+
+struct mlx5_ct_zone_rule {
+ struct mlx5_ct_fs_rule *rule;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mlx5_flow_attr *attr;
+ bool nat;
+};
+
+struct mlx5_tc_ct_pre {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *flow_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_modify_hdr *modify_hdr;
+};
+
+struct mlx5_ct_ft {
+ struct rhash_head node;
+ u16 zone;
+ u32 zone_restore_id;
+ refcount_t refcount;
+ struct nf_flowtable *nf_ft;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct rhashtable ct_entries_ht;
+ struct mlx5_tc_ct_pre pre_ct;
+ struct mlx5_tc_ct_pre pre_ct_nat;
+};
+
+struct mlx5_ct_tuple {
+ u16 addr_type;
+ __be16 n_proto;
+ u8 ip_proto;
+ struct {
+ union {
+ __be32 src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ __be32 dst_v4;
+ struct in6_addr dst_v6;
+ };
+ } ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+
+ u16 zone;
+};
+
+struct mlx5_ct_counter {
+ struct mlx5_fc *counter;
+ refcount_t refcount;
+ bool is_shared;
+};
+
+enum {
+ MLX5_CT_ENTRY_FLAG_VALID,
+};
+
+struct mlx5_ct_entry {
+ struct rhash_head node;
+ struct rhash_head tuple_node;
+ struct rhash_head tuple_nat_node;
+ struct mlx5_ct_counter *counter;
+ unsigned long cookie;
+ unsigned long restore_cookie;
+ struct mlx5_ct_tuple tuple;
+ struct mlx5_ct_tuple tuple_nat;
+ struct mlx5_ct_zone_rule zone_rules[2];
+
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct work_struct work;
+
+ refcount_t refcnt;
+ unsigned long flags;
+};
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh);
+
+static const struct rhashtable_params cts_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, node),
+ .key_offset = offsetof(struct mlx5_ct_entry, cookie),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params zone_params = {
+ .head_offset = offsetof(struct mlx5_ct_ft, node),
+ .key_offset = offsetof(struct mlx5_ct_ft, zone),
+ .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params tuples_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params tuples_nat_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static bool
+mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+{
+ return !!(entry->tuple_nat_node.next);
+}
+
+static int
+mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
+ u32 *labels, u32 *id)
+{
+ if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
+ *id = 0;
+ return 0;
+ }
+
+ if (mapping_add(ct_priv->labels_mapping, labels, id))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void
+mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
+{
+ if (id)
+ mapping_remove(ct_priv->labels_mapping, id);
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
+{
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+
+ flow_rule_match_basic(rule, &basic);
+ flow_rule_match_control(rule, &control);
+
+ tuple->n_proto = basic.key->n_proto;
+ tuple->ip_proto = basic.key->ip_proto;
+ tuple->addr_type = control.key->addr_type;
+
+ if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ tuple->ip.src_v4 = match.key->src;
+ tuple->ip.dst_v4 = match.key->dst;
+ } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ tuple->ip.src_v6 = match.key->src;
+ tuple->ip.dst_v6 = match.key->dst;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (tuple->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ tuple->port.src = match.key->src;
+ tuple->port.dst = match.key->dst;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ } else {
+ if (tuple->ip_proto != IPPROTO_GRE)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
+ struct flow_rule *rule)
+{
+ struct flow_action *flow_action = &rule->action;
+ struct flow_action_entry *act;
+ u32 offset, val, ip6_offset;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE)
+ continue;
+
+ offset = act->mangle.offset;
+ val = act->mangle.val;
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ if (offset == offsetof(struct iphdr, saddr))
+ tuple->ip.src_v4 = cpu_to_be32(val);
+ else if (offset == offsetof(struct iphdr, daddr))
+ tuple->ip.dst_v4 = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
+ ip6_offset /= 4;
+ if (ip6_offset < 4)
+ tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
+ else if (ip6_offset < 8)
+ tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ if (offset == offsetof(struct tcphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct tcphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ if (offset == offsetof(struct udphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct udphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_rule *rule)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ match.mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ match.key->ip_proto);
+
+ ip_proto = match.key->ip_proto;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.key->src, sizeof(match.key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, sizeof(match.key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(match.key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(match.key->dst));
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(match.mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(match.key->flags));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
+ return 0;
+}
+
+static void
+mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+ if (entry->counter->is_shared &&
+ !refcount_dec_and_test(&entry->counter->refcount))
+ return;
+
+ mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
+ kfree(entry->counter);
+}
+
+static void
+mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry,
+ bool nat)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5_flow_attr *attr = zone_rule->attr;
+
+ ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
+
+ ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ kfree(attr);
+}
+
+static void
+mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ atomic_dec(&ct_priv->debugfs.stats.offloaded);
+}
+
+static struct flow_action_entry *
+mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct flow_action_entry *act;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT_METADATA)
+ return act;
+ }
+
+ return NULL;
+}
+
+static int
+mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ u8 ct_state,
+ u32 mark,
+ u32 labels_id,
+ u8 zone_restore_id)
+{
+ enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ CTSTATE_TO_REG, ct_state);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ MARK_TO_REG, mark);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ LABELS_TO_REG, labels_id);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+
+ /* Make another copy of zone id in reg_b for
+ * NIC rx flows since we don't copy reg_c1 to
+ * reg_b upon miss.
+ */
+ if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
+static int
+mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
+ char *modact)
+{
+ u32 offset = act->mangle.offset, field;
+
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct iphdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
+ else if (offset == offsetof(struct iphdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct ipv6hdr, saddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct tcphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
+ else if (offset == offsetof(struct tcphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct udphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
+ else if (offset == offsetof(struct udphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, offset, 0);
+ MLX5_SET(set_action_in, modact, field, field);
+ MLX5_SET(set_action_in, modact, data, act->mangle.val);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ struct flow_action_entry *act;
+ char *modact;
+ int err, i;
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_MANGLE: {
+ modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
+
+ err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
+ if (err)
+ return err;
+
+ mod_acts->num_actions++;
+ }
+ break;
+
+ case FLOW_ACTION_CT_METADATA:
+ /* Handled earlier */
+ continue;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule,
+ struct mlx5e_mod_hdr_handle **mh,
+ u8 zone_restore_id, bool nat_table, bool has_nat)
+{
+ DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
+ DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
+ struct flow_action_entry *meta;
+ u16 ct_state = 0;
+ int err;
+
+ meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta)
+ return -EOPNOTSUPP;
+
+ err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
+ &attr->ct_attr.ct_labels_id);
+ if (err)
+ return -EOPNOTSUPP;
+ if (nat_table) {
+ if (has_nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
+
+ ct_state |= MLX5_CT_STATE_NAT_BIT;
+ }
+
+ ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
+ ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
+ err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
+ ct_state,
+ meta->ct_metadata.mark,
+ attr->ct_attr.ct_labels_id,
+ zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ if (nat_table && has_nat) {
+ attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
+ mod_acts.num_actions,
+ mod_acts.actions);
+ if (IS_ERR(attr->modify_hdr)) {
+ err = PTR_ERR(attr->modify_hdr);
+ goto err_mapping;
+ }
+
+ *mh = NULL;
+ } else {
+ *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl,
+ ct_priv->ns_type,
+ &mod_acts);
+ if (IS_ERR(*mh)) {
+ err = PTR_ERR(*mh);
+ goto err_mapping;
+ }
+ attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
+ }
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ return 0;
+
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ return err;
+}
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (mh)
+ mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
+ else
+ mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ bool nat, u8 zone_restore_id)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_flow_attr *attr;
+ int err;
+
+ zone_rule->nat = nat;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
+ err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
+ &zone_rule->mh,
+ zone_restore_id,
+ nat,
+ mlx5_tc_ct_entry_has_nat(entry));
+ if (err) {
+ ct_dbg("Failed to create ct entry mod hdr");
+ goto err_mod_hdr;
+ }
+
+ attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = 0;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ if (entry->tuple.ip_proto == IPPROTO_TCP ||
+ entry->tuple.ip_proto == IPPROTO_UDP)
+ attr->outer_match_level = MLX5_MATCH_L4;
+ else
+ attr->outer_match_level = MLX5_MATCH_L3;
+ attr->counter = entry->counter->counter;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->in_mdev = priv->mdev;
+
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
+
+ zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
+ if (IS_ERR(zone_rule->rule)) {
+ err = PTR_ERR(zone_rule->rule);
+ ct_dbg("Failed to add ct entry rule, nat: %d", nat);
+ goto err_rule;
+ }
+
+ zone_rule->attr = attr;
+
+ kvfree(spec);
+ ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
+
+ return 0;
+
+err_rule:
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+err_mod_hdr:
+ kfree(attr);
+err_attr:
+ kvfree(spec);
+ return err;
+}
+
+static bool
+mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+}
+
+static struct mlx5_ct_entry *
+mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
+{
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
+ tuples_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt)) {
+ return entry;
+ } else if (!entry) {
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
+ tuple, tuples_nat_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt))
+ return entry;
+ }
+
+ return entry ? ERR_PTR(-EINVAL) : NULL;
+}
+
+static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
+}
+
+static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ kfree(entry);
+}
+
+static void
+mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
+{
+ struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void
+__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
+ queue_work(entry->ct_priv->wq, &entry->work);
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_ct_counter *counter;
+ int ret;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ counter->is_shared = false;
+ counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
+ if (IS_ERR(counter->counter)) {
+ ct_dbg("Failed to create counter for ct entry");
+ ret = PTR_ERR(counter->counter);
+ kfree(counter);
+ return ERR_PTR(ret);
+ }
+
+ return counter;
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_ct_tuple rev_tuple = entry->tuple;
+ struct mlx5_ct_counter *shared_counter;
+ struct mlx5_ct_entry *rev_entry;
+
+ /* get the reversed tuple */
+ swap(rev_tuple.port.src, rev_tuple.port.dst);
+
+ if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ __be32 tmp_addr = rev_tuple.ip.src_v4;
+
+ rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+ rev_tuple.ip.dst_v4 = tmp_addr;
+ } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+ rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+ rev_tuple.ip.dst_v6 = tmp_addr;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Use the same counter as the reverse direction */
+ spin_lock_bh(&ct_priv->ht_lock);
+ rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
+
+ if (IS_ERR(rev_entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ goto create_counter;
+ }
+
+ if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+ ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
+ shared_counter = rev_entry->counter;
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(rev_entry);
+ return shared_counter;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+create_counter:
+
+ shared_counter = mlx5_tc_ct_counter_create(ct_priv);
+ if (IS_ERR(shared_counter))
+ return shared_counter;
+
+ shared_counter->is_shared = true;
+ refcount_set(&shared_counter->refcount, 1);
+ return shared_counter;
+}
+
+static int
+mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ u8 zone_restore_id)
+{
+ int err;
+
+ if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
+ entry->counter = mlx5_tc_ct_counter_create(ct_priv);
+ else
+ entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+
+ if (IS_ERR(entry->counter)) {
+ err = PTR_ERR(entry->counter);
+ return err;
+ }
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
+ zone_restore_id);
+ if (err)
+ goto err_orig;
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
+ zone_restore_id);
+ if (err)
+ goto err_nat;
+
+ atomic_inc(&ct_priv->debugfs.stats.offloaded);
+ return 0;
+
+err_nat:
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+err_orig:
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ struct flow_action_entry *meta_action;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+ int err;
+
+ meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta_action)
+ return -EOPNOTSUPP;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (entry && refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_put(entry);
+ return -EEXIST;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->tuple.zone = ft->zone;
+ entry->cookie = flow->cookie;
+ entry->restore_cookie = meta_action->ct_metadata.cookie;
+ refcount_set(&entry->refcnt, 2);
+ entry->ct_priv = ct_priv;
+
+ err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
+ if (err)
+ goto err_set;
+
+ memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
+ err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
+ if (err)
+ goto err_set;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+
+ err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_entries;
+
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+ if (err)
+ goto err_tuple;
+
+ if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ if (err)
+ goto err_tuple_nat;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
+ ft->zone_restore_id);
+ if (err)
+ goto err_rules;
+
+ set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+ mlx5_tc_ct_entry_put(entry); /* this function reference */
+
+ return 0;
+
+err_rules:
+ spin_lock_bh(&ct_priv->ht_lock);
+ if (mlx5_tc_ct_entry_has_nat(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node, tuples_nat_ht_params);
+err_tuple_nat:
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+err_tuple:
+ rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params);
+err_entries:
+ spin_unlock_bh(&ct_priv->ht_lock);
+err_set:
+ kfree(entry);
+ if (err != -EEXIST)
+ netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(entry);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *f)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = f->cookie;
+ struct mlx5_ct_entry *entry;
+ u64 lastuse, packets, bytes;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+
+ mlx5_tc_ct_entry_put(entry);
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload *f = type_data;
+ struct mlx5_ct_ft *ft = cb_priv;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5_tc_ct_block_flow_offload_add(ft, f);
+ case FLOW_CLS_DESTROY:
+ return mlx5_tc_ct_block_flow_offload_del(ft, f);
+ case FLOW_CLS_STATS:
+ return mlx5_tc_ct_block_flow_offload_stats(ft, f);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static bool
+mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
+ u16 zone)
+{
+ struct flow_keys flow_keys;
+
+ skb_reset_network_header(skb);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
+
+ tuple->zone = zone;
+
+ if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
+ flow_keys.basic.ip_proto != IPPROTO_UDP &&
+ flow_keys.basic.ip_proto != IPPROTO_GRE)
+ return false;
+
+ if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
+ flow_keys.basic.ip_proto == IPPROTO_UDP) {
+ tuple->port.src = flow_keys.ports.src;
+ tuple->port.dst = flow_keys.ports.dst;
+ }
+ tuple->n_proto = flow_keys.basic.n_proto;
+ tuple->ip_proto = flow_keys.basic.ip_proto;
+
+ switch (flow_keys.basic.n_proto) {
+ case htons(ETH_P_IP):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
+ tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
+ break;
+
+ case htons(ETH_P_IPV6):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
+ tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
+ break;
+ default:
+ goto out;
+ }
+
+ return true;
+
+out:
+ return false;
+}
+
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ u32 ctstate = 0, ctstate_mask = 0;
+
+ mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
+ &ctstate, &ctstate_mask);
+
+ if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
+ return -EOPNOTSUPP;
+
+ ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+
+ return 0;
+}
+
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
+{
+ if (!priv || !ct_attr->ct_labels_id)
+ return;
+
+ mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
+}
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector_key_ct *mask, *key;
+ u32 ctstate = 0, ctstate_mask = 0;
+ u16 ct_state_on, ct_state_off;
+ u16 ct_state, ct_state_mask;
+ struct flow_match_ct match;
+ u32 ct_labels[4];
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct matching isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_ct(rule, &match);
+
+ key = match.key;
+ mask = match.mask;
+
+ ct_state = key->ct_state;
+ ct_state_mask = mask->ct_state;
+
+ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
+ TCA_FLOWER_KEY_CT_FLAGS_NEW |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY |
+ TCA_FLOWER_KEY_CT_FLAGS_RELATED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only ct_state trk, est, new and rpl are supported for offload");
+ return -EOPNOTSUPP;
+ }
+
+ ct_state_on = ct_state & ct_state_mask;
+ ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
+ trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
+ est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+ untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+
+ ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
+ ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
+
+ if (rel) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +rel isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (inv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +inv isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (new) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +new isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (mask->ct_zone)
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ key->ct_zone, MLX5_CT_ZONE_MASK);
+ if (ctstate_mask)
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+ if (mask->ct_mark)
+ mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
+ key->ct_mark, mask->ct_mark);
+ if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
+ mask->ct_labels[3]) {
+ ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
+ ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
+ ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
+ ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
+ if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
+ return -EOPNOTSUPP;
+ mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
+ MLX5_CT_LABELS_MASK);
+ }
+
+ return 0;
+}
+
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct action isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ attr->ct_attr.zone = act->ct.zone;
+ attr->ct_attr.ct_action = act->ct.action;
+ attr->ct_attr.nf_ft = act->ct.flow_table;
+
+ return 0;
+}
+
+static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table *ft = pre_ct->ft;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 ctstate;
+ u16 zone;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+ ZONE_TO_REG, zone);
+ if (err) {
+ ct_dbg("Failed to set zone register mapping");
+ goto err_mapping;
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
+ pre_mod_acts.num_actions,
+ pre_mod_acts.actions);
+
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct->modify_hdr = mod_hdr;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.modify_hdr = mod_hdr;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ /* add flow rule */
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ zone, MLX5_CT_ZONE_MASK);
+ ctstate = MLX5_CT_STATE_TRK_BIT;
+ if (nat)
+ ctstate |= MLX5_CT_STATE_NAT_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
+
+ dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct flow rule zone %d", zone);
+ goto err_flow_rule;
+ }
+ pre_ct->flow_rule = rule;
+
+ /* add miss rule */
+ dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct miss rule zone %d", zone);
+ goto err_miss_rule;
+ }
+ pre_ct->miss_rule = rule;
+
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return 0;
+
+err_miss_rule:
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+err_flow_rule:
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return err;
+}
+
+static void
+tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+ mlx5_del_flow_rules(pre_ct->miss_rule);
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 metadata_reg_c_2_mask;
+ u32 *flow_group_in;
+ void *misc;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ ct_dbg("Failed to get flow namespace");
+ return err;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to create pre ct table");
+ goto out_free;
+ }
+ pre_ct->ft = ft;
+
+ /* create flow group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria.misc_parameters_2);
+
+ metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
+ if (nat)
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
+ metadata_reg_c_2_mask);
+
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct group");
+ goto err_flow_grp;
+ }
+ pre_ct->flow_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct miss group");
+ goto err_miss_grp;
+ }
+ pre_ct->miss_grp = g;
+
+ err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
+ if (err)
+ goto err_add_rules;
+
+ kvfree(flow_group_in);
+ return 0;
+
+err_add_rules:
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+err_miss_grp:
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+err_flow_grp:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+ mlx5_destroy_flow_table(pre_ct->ft);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ int err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
+ if (err)
+ return err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
+ if (err)
+ goto err_pre_ct_nat;
+
+ return 0;
+
+err_pre_ct_nat:
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+}
+
+/* To avoid false lock dependency warning set the ct_entries_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key ct_entries_ht_lock_key;
+
+static struct mlx5_ct_ft *
+mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
+ struct nf_flowtable *nf_ft)
+{
+ struct mlx5_ct_ft *ft;
+ int err;
+
+ ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
+ if (ft) {
+ refcount_inc(&ft->refcount);
+ return ft;
+ }
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ ft->zone = zone;
+ ft->nf_ft = nf_ft;
+ ft->ct_priv = ct_priv;
+ refcount_set(&ft->refcount, 1);
+
+ err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
+ if (err)
+ goto err_alloc_pre_ct;
+
+ err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
+ if (err)
+ goto err_init;
+
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+
+ err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
+ zone_params);
+ if (err)
+ goto err_insert;
+
+ err = nf_flow_table_offload_add_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ if (err)
+ goto err_add_cb;
+
+ return ft;
+
+err_add_cb:
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+err_insert:
+ rhashtable_destroy(&ft->ct_entries_ht);
+err_init:
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+err_alloc_pre_ct:
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+err_mapping:
+ kfree(ft);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
+{
+ struct mlx5_ct_entry *entry = ptr;
+
+ mlx5_tc_ct_entry_put(entry);
+}
+
+static void
+mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+{
+ if (!refcount_dec_and_test(&ft->refcount))
+ return;
+
+ flush_workqueue(ct_priv->wq);
+ nf_flow_table_offload_del_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+ rhashtable_free_and_destroy(&ft->ct_entries_ht,
+ mlx5_tc_ct_flush_ft_entry,
+ ct_priv);
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+ kfree(ft);
+}
+
+/* We translate the tc filter with CT action to the following HW model:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * | set chain miss mapping
+ * | set fte_id
+ * | set tunnel_id
+ * | do decap
+ * v
+ * +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +-------------------------+
+ * + zone+nat match +---------------->+ post_act (see below) +
+ * +---------------------+ set zone +-------------------------+
+ * | set zone
+ * v
+ * +--------------------+
+ * + CT (nat or no nat) +
+ * + tuple + zone match +
+ * +--------------------+
+ * | set mark
+ * | set labels_id
+ * | set established
+ * | set zone_restore
+ * | do nat (if needed)
+ * v
+ * +--------------+
+ * + post_act + original filter actions
+ * + fte_id match +------------------------>
+ * +--------------+
+ */
+static struct mlx5_flow_handle *
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *orig_spec,
+ struct mlx5_flow_attr *attr)
+{
+ bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_ct_flow *ct_flow;
+ int chain_mapping = 0, err;
+ struct mlx5_ct_ft *ft;
+
+ ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
+ if (!ct_flow) {
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Register for CT established events */
+ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
+ attr->ct_attr.nf_ft);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to register to ft callback");
+ goto err_ft;
+ }
+ ct_flow->ft = ft;
+
+ /* Base flow attributes of both rules on original rule attribute */
+ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre;
+ }
+
+ pre_ct_attr = ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, attr_sz);
+ pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
+
+ /* Modify the original rule's action to fwd and modify, leave decap */
+ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Write chain miss tag for miss in ct table as we
+ * don't go though all prios of this chain as normal tc rules
+ * miss.
+ */
+ err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+ &chain_mapping);
+ if (err) {
+ ct_dbg("Failed to get chain register mapping for chain");
+ goto err_get_chain;
+ }
+ ct_flow->chain_mapping = chain_mapping;
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
+ CHAIN_TO_REG, chain_mapping);
+ if (err) {
+ ct_dbg("Failed to set chain register mapping");
+ goto err_mapping;
+ }
+
+ /* If original flow is decap, we do it before going into ct table
+ * so add a rewrite for the tunnel match_id.
+ */
+ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ attr->chain == 0) {
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
+ ct_priv->ns_type,
+ TUNNEL_TO_REG,
+ attr->tunnel_id);
+ if (err) {
+ ct_dbg("Failed to set tunnel register mapping");
+ goto err_mapping;
+ }
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
+ pre_mod_acts->num_actions,
+ pre_mod_acts->actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct_attr->modify_hdr = mod_hdr;
+
+ /* Change original rule point to ct table */
+ pre_ct_attr->dest_chain = 0;
+ pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
+ ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
+ pre_ct_attr);
+ if (IS_ERR(ct_flow->pre_ct_rule)) {
+ err = PTR_ERR(ct_flow->pre_ct_rule);
+ ct_dbg("Failed to add pre ct rule");
+ goto err_insert_orig;
+ }
+
+ attr->ct_attr.ct_flow = ct_flow;
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+
+ return ct_flow->pre_ct_rule;
+
+err_insert_orig:
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+err_get_chain:
+ kfree(ct_flow->pre_ct_attr);
+err_alloc_pre:
+ mlx5_tc_ct_del_ft_cb(ct_priv, ft);
+err_ft:
+ kfree(ct_flow);
+ netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ struct mlx5_flow_handle *rule;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->control_lock);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
+ mutex_unlock(&priv->control_lock);
+
+ return rule;
+}
+
+static void
+__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
+
+ kfree(ct_flow->pre_ct_attr);
+ kfree(ct_flow);
+}
+
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
+
+ /* We are called on error to clean up stuff from parsing
+ * but we don't have anything for now
+ */
+ if (!ct_flow)
+ return;
+
+ mutex_lock(&priv->control_lock);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
+ mutex_unlock(&priv->control_lock);
+}
+
+static int
+mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
+ int err;
+
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
+ ct_dbg("Using SMFS ct flow steering provider");
+ fs_ops = mlx5_ct_fs_smfs_ops_get();
+ }
+
+ ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
+ if (!ct_priv->fs)
+ return -ENOMEM;
+
+ ct_priv->fs->netdev = ct_priv->netdev;
+ ct_priv->fs->dev = ct_priv->dev;
+ ct_priv->fs_ops = fs_ops;
+
+ err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
+ if (err)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ kfree(ct_priv->fs);
+ return err;
+}
+
+static int
+mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
+ const char **err_msg)
+{
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
+ /* vlan workaround should be avoided for multi chain rules.
+ * This is just a sanity check as pop vlan action should
+ * be supported by any FW that supports ignore_flow_level
+ */
+
+ *err_msg = "firmware vlan actions support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
+ fdb_modify_header_fwd_to_table)) {
+ /* CT always writes to registers which are mod header actions.
+ * Therefore, mod header and goto is required
+ */
+
+ *err_msg = "firmware fwd and modify support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *err_msg = "register loopback isn't supported";
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ const char *err_msg = NULL;
+ int err = 0;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ /* cannot restore chain ID on HW miss */
+
+ err_msg = "tc skb extension missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+#endif
+ if (IS_ERR_OR_NULL(post_act)) {
+ /* Ignore_flow_level support isn't supported by default for VFs and so post_act
+ * won't be supported. Skip showing error msg.
+ */
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ err_msg = "post action is missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
+
+out_err:
+ if (err && err_msg)
+ netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
+ return err;
+}
+
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
+ struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+ char dirname[16] = {};
+
+ if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
+ return;
+
+ ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
+ debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.offloaded);
+ debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
+#define INIT_ERR_PREFIX "tc ct offload init failed"
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5_core_dev *dev;
+ u64 mapping_id;
+ int err;
+
+ dev = priv->mdev;
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
+ if (err)
+ goto err_support;
+
+ ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
+ if (!ct_priv)
+ goto err_alloc;
+
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
+ if (IS_ERR(ct_priv->zone_mapping)) {
+ err = PTR_ERR(ct_priv->zone_mapping);
+ goto err_mapping_zone;
+ }
+
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
+ if (IS_ERR(ct_priv->labels_mapping)) {
+ err = PTR_ERR(ct_priv->labels_mapping);
+ goto err_mapping_labels;
+ }
+
+ spin_lock_init(&ct_priv->ht_lock);
+ ct_priv->ns_type = ns_type;
+ ct_priv->chains = chains;
+ ct_priv->netdev = priv->netdev;
+ ct_priv->dev = priv->mdev;
+ ct_priv->mod_hdr_tbl = mod_hdr;
+ ct_priv->ct = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct)) {
+ err = PTR_ERR(ct_priv->ct);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_tbl;
+ }
+
+ ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct_nat)) {
+ err = PTR_ERR(ct_priv->ct_nat);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct nat table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_nat_tbl;
+ }
+
+ ct_priv->post_act = post_act;
+ mutex_init(&ct_priv->control_lock);
+ if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
+ goto err_ct_zone_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
+ goto err_ct_tuples_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
+ goto err_ct_tuples_nat_ht;
+
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ err = mlx5_tc_ct_fs_init(ct_priv);
+ if (err)
+ goto err_init_fs;
+
+ mlx5_ct_tc_create_dbgfs(ct_priv);
+ return ct_priv;
+
+err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+err_ct_tuples_nat_ht:
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+err_ct_tuples_ht:
+ rhashtable_destroy(&ct_priv->zone_ht);
+err_ct_zone_ht:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+err_ct_nat_tbl:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+err_ct_tbl:
+ mapping_destroy(ct_priv->labels_mapping);
+err_mapping_labels:
+ mapping_destroy(ct_priv->zone_mapping);
+err_mapping_zone:
+ kfree(ct_priv);
+err_alloc:
+err_support:
+
+ return NULL;
+}
+
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_fs_chains *chains;
+
+ if (!ct_priv)
+ return;
+
+ destroy_workqueue(ct_priv->wq);
+ mlx5_ct_tc_remove_dbgfs(ct_priv);
+ chains = ct_priv->chains;
+
+ ct_priv->fs_ops->destroy(ct_priv->fs);
+ kfree(ct_priv->fs);
+
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+ mapping_destroy(ct_priv->zone_mapping);
+ mapping_destroy(ct_priv->labels_mapping);
+
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+ rhashtable_destroy(&ct_priv->zone_ht);
+ mutex_destroy(&ct_priv->control_lock);
+ kfree(ct_priv);
+}
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ struct mlx5_ct_tuple tuple = {};
+ struct mlx5_ct_entry *entry;
+ u16 zone;
+
+ if (!ct_priv || !zone_restore_id)
+ return true;
+
+ if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
+ goto out_inc_drop;
+
+ if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
+ goto out_inc_drop;
+
+ spin_lock(&ct_priv->ht_lock);
+
+ entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
+ if (!entry) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+
+ if (IS_ERR(entry)) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+ spin_unlock(&ct_priv->ht_lock);
+
+ tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+ __mlx5_tc_ct_entry_put(entry);
+
+ return true;
+
+out_inc_drop:
+ atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+ return false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
new file mode 100644
index 000000000000..5bbd6b92840f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_CT_H__
+#define __MLX5_EN_TC_CT_H__
+
+#include <net/pkt_cls.h>
+#include <linux/mlx5/fs.h>
+#include <net/tc_act/tc_ct.h>
+
+#include "en.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_mod_hdr_acts;
+struct mlx5_rep_uplink_priv;
+struct mlx5e_tc_flow;
+struct mlx5e_priv;
+
+struct mlx5_fs_chains;
+struct mlx5_tc_ct_priv;
+struct mlx5_ct_flow;
+
+struct nf_flowtable;
+
+struct mlx5_ct_attr {
+ u16 zone;
+ u16 ct_action;
+ struct mlx5_ct_flow *ct_flow;
+ struct nf_flowtable *nf_ft;
+ u32 ct_labels_id;
+};
+
+#define zone_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
+ .moffset = 0,\
+ .mlen = 16,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_2),\
+}
+
+#define ctstate_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
+ .moffset = 16,\
+ .mlen = 16,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_2),\
+}
+
+#define mark_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\
+ .moffset = 0,\
+ .mlen = 32,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_3),\
+}
+
+#define labels_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\
+ .moffset = 0,\
+ .mlen = 32,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_4),\
+}
+
+/* 8 LSB of metadata C5 are reserved for packet color */
+#define fteid_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\
+ .moffset = 8,\
+ .mlen = 24,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_5),\
+}
+
+#define zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\
+ .moffset = 0,\
+ .mlen = ESW_ZONE_ID_BITS,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_1),\
+}
+
+#define nic_zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
+ .moffset = 16,\
+ .mlen = ESW_ZONE_ID_BITS,\
+}
+
+#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG)
+#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG)
+
+#if IS_ENABLED(CONFIG_MLX5_TC_CT)
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act);
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
+
+void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr);
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack);
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id);
+
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
+#else /* CONFIG_MLX5_TC_CT */
+
+static inline struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ return NULL;
+}
+
+static inline void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+}
+
+static inline void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {}
+
+static inline int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ return 0;
+}
+
+static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+}
+
+static inline bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ if (!zone_restore_id)
+ return true;
+
+ return false;
+}
+
+#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */
+#endif /* __MLX5_EN_TC_CT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
new file mode 100644
index 000000000000..2e42d7c5451e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_PRIV_H__
+#define __MLX5_EN_TC_PRIV_H__
+
+#include "en_tc.h"
+#include "en/tc/act/act.h"
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+
+enum {
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
+ MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11,
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
+ struct mlx5_flow_spec spec;
+ struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_act_parse_state parse_state;
+};
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct mlx5e_encap_entry *e; /* attached encap instance */
+ struct list_head list;
+ int index;
+};
+
+struct encap_route_flow_item {
+ struct mlx5e_route_entry *r; /* attached route instance */
+ int index;
+};
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ unsigned long flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
+ /* flows sharing same route entry */
+ struct list_head decap_routes;
+ struct mlx5e_route_entry *decap_route;
+ struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
+
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
+ struct list_head unready; /* flows not ready to be offloaded (e.g
+ * due to missing route)
+ */
+ struct net_device *orig_dev; /* netdev adding flow first */
+ int tmp_entry_index;
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+ struct completion init_done;
+ struct completion del_hw_done;
+ struct mlx5_flow_attr *attr;
+ struct list_head attrs;
+ u32 chain_mapping;
+};
+
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow);
+
+void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow);
+int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow);
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow);
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
+
+static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before setting bit. */
+ smp_mb__before_atomic();
+ set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+ unsigned long flag)
+{
+ /* test_and_set_bit() provides all necessary barriers */
+ return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag) \
+ __flow_flag_test_and_set(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before clearing bit. */
+ smp_mb__before_atomic();
+ clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ bool ret = test_bit(flag, &flow->flags);
+
+ /* Read fields of flow structure only after checking flags. */
+ smp_mb__after_atomic();
+ return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow);
+struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow);
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow);
+
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_get_int_port_priv(struct mlx5e_priv *priv);
+
+struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev);
+
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec);
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec);
+
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index af4ebd2951b5..e6f64d890fb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -1,11 +1,39 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
+#include <net/inet_ecn.h>
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
+#include <net/bareudp.h>
#include "en/tc_tun.h"
+#include "en/tc_priv.h"
#include "en_tc.h"
+#include "rep/tc.h"
+#include "rep/neigh.h"
+#include "lag/lag.h"
+#include "lag/mp.h"
+
+struct mlx5e_tc_tun_route_attr {
+ struct net_device *out_dev;
+ struct net_device *route_dev;
+ union {
+ struct flowi4 fl4;
+ struct flowi6 fl6;
+ } fl;
+ struct neighbour *n;
+ u8 ttl;
+};
+
+#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
+
+static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
+{
+ if (attr->n)
+ neigh_release(attr->n);
+ if (attr->route_dev)
+ dev_put(attr->route_dev);
+}
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
@@ -16,6 +44,8 @@ struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
else if (netif_is_gretap(tunnel_dev) ||
netif_is_ip6gretap(tunnel_dev))
return &gre_tunnel;
+ else if (netif_is_bareudp(tunnel_dev))
+ return &mplsoudp_tunnel;
else
return NULL;
}
@@ -54,7 +84,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
*/
*route_dev = dev;
if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
- dst_is_lag_dev || is_vlan_dev(*route_dev))
+ dst_is_lag_dev || is_vlan_dev(*route_dev) ||
+ netif_is_ovs_master(*route_dev))
*out_dev = uplink_dev;
else if (mlx5e_eswitch_rep(dev) &&
mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
@@ -66,17 +97,18 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
return -EOPNOTSUPP;
+ if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev)
+ return -EOPNOTSUPP;
+
return 0;
}
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
+static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct mlx5e_tc_tun_route_attr *attr)
{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
struct neighbour *n;
struct rtable *rt;
@@ -89,37 +121,60 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = mdev->priv.eswitch;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- fl4->flowi4_oif = uplink_dev->ifindex;
+ attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
+ } else {
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
+
+ if (tunnel && tunnel->get_remote_ifindex)
+ attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
}
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- ret = PTR_ERR_OR_ZERO(rt);
- if (ret)
- return ret;
+ rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ if (rt->rt_type != RTN_UNICAST) {
+ ret = -ENETUNREACH;
+ goto err_rt_release;
+ }
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
- ip_rt_put(rt);
- return -ENETUNREACH;
+ ret = -ENETUNREACH;
+ goto err_rt_release;
}
#else
return -EOPNOTSUPP;
#endif
- ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
- if (ret < 0) {
- ip_rt_put(rt);
- return ret;
+ ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_rt_release;
+ dev_hold(route_dev);
+
+ if (!attr->ttl)
+ attr->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err_dev_release;
}
- if (!(*out_ttl))
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
ip_rt_put(rt);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
+ attr->route_dev = route_dev;
+ attr->out_dev = out_dev;
+ attr->n = n;
return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_rt_release:
+ ip_rt_put(rt);
+ return ret;
+}
+
+static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
+{
+ mlx5e_tc_tun_route_attr_cleanup(attr);
}
static const char *mlx5e_netdev_kind(struct net_device *dev)
@@ -171,28 +226,27 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
- struct net_device *out_dev, *route_dev;
- struct flowi4 fl4 = {};
- struct neighbour *n;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
int ipv4_encap_size;
char *encap_header;
- u8 nud_state, ttl;
struct iphdr *ip;
+ u8 nud_state;
int err;
/* add the IP fields */
- fl4.flowi4_tos = tun_key->tos;
- fl4.daddr = tun_key->u.ipv4.dst;
- fl4.saddr = tun_key->u.ipv4.src;
- ttl = tun_key->ttl;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
- &fl4, &n, &ttl);
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
if (err)
return err;
ipv4_encap_size =
- (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct iphdr) +
e->tunnel->calc_hlen(e);
@@ -209,40 +263,36 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
goto release_neigh;
}
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
- e->route_dev = route_dev;
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
/* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err)
goto free_encap;
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
/* add ethernet header */
- ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
ETH_P_IP);
/* add ip header */
ip->tos = tun_key->tos;
ip->version = 0x4;
ip->ihl = 0x5;
- ip->ttl = ttl;
- ip->daddr = fl4.daddr;
- ip->saddr = fl4.saddr;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
@@ -254,15 +304,18 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
+ neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
goto release_neigh;
}
- e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- ipv4_encap_size, encap_header,
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv4_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
MLX5_FLOW_NAMESPACE_FDB);
if (IS_ERR(e->pkt_reformat)) {
err = PTR_ERR(e->pkt_reformat);
@@ -270,8 +323,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
return err;
destroy_neigh_entry:
@@ -279,45 +332,162 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- neigh_release(n);
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv4_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv4_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
return err;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
+static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct mlx5e_tc_tun_route_attr *attr)
{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
+ struct net_device *route_dev;
+ struct net_device *out_dev;
struct dst_entry *dst;
struct neighbour *n;
-
int ret;
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
+ if (tunnel && tunnel->get_remote_ifindex)
+ attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
- if (!(*out_ttl))
- *out_ttl = ip6_dst_hoplimit(dst);
+ if (!attr->ttl)
+ attr->ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_dst_release;
- ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
- if (ret < 0) {
- dst_release(dst);
- return ret;
+ dev_hold(route_dev);
+ n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err_dev_release;
}
- n = dst_neigh_lookup(dst, &fl6->daddr);
dst_release(dst);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
+ attr->out_dev = out_dev;
+ attr->route_dev = route_dev;
+ attr->n = n;
return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_dst_release:
+ dst_release(dst);
+ return ret;
+}
+
+static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
+{
+ mlx5e_tc_tun_route_attr_cleanup(attr);
}
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
@@ -326,28 +496,26 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
- struct net_device *out_dev, *route_dev;
- struct flowi6 fl6 = {};
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
struct ipv6hdr *ip6h;
- struct neighbour *n = NULL;
int ipv6_encap_size;
char *encap_header;
- u8 nud_state, ttl;
+ u8 nud_state;
int err;
- ttl = tun_key->ttl;
-
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
- fl6.daddr = tun_key->u.ipv6.dst;
- fl6.saddr = tun_key->u.ipv6.src;
+ attr.ttl = tun_key->ttl;
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
- &fl6, &n, &ttl);
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
if (err)
return err;
ipv6_encap_size =
- (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct ipv6hdr) +
e->tunnel->calc_hlen(e);
@@ -364,39 +532,35 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
goto release_neigh;
}
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
- e->route_dev = route_dev;
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
- /* It's importent to add the neigh to the hash table before checking
+ /* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err)
goto free_encap;
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
/* add ethernet header */
- ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
ETH_P_IPV6);
/* add ip header */
ip6_flow_hdr(ip6h, tun_key->tos, 0);
/* the HW fills up ipv6 payload len */
- ip6h->hop_limit = ttl;
- ip6h->daddr = fl6.daddr;
- ip6h->saddr = fl6.saddr;
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
@@ -408,16 +572,18 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
+ neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
goto release_neigh;
}
- e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- ipv6_encap_size, encap_header,
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv6_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
MLX5_FLOW_NAMESPACE_FDB);
if (IS_ERR(e->pkt_reformat)) {
err = PTR_ERR(e->pkt_reformat);
@@ -425,8 +591,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
return err;
destroy_neigh_entry:
@@ -434,10 +600,174 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- neigh_release(n);
+ mlx5e_route_lookup_ipv6_put(&attr);
return err;
}
+
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv6_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv6_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+#endif
+
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *flow_attr,
+ struct net_device *filter_dev)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_int_port *int_port;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ u16 vport_num;
+ int err = 0;
+
+ if (flow_attr->tun_ip_version == 4) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
+ attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
+ err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->tun_ip_version == 6) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
+ attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
+ err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
+ }
#endif
+ else
+ return 0;
+
+ if (err)
+ return err;
+
+ if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
+ mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) {
+ err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
+ misc_parameters.vxlan_vni);
+ esw_attr->rx_tun_attr->decap_vport = vport_num;
+ } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
+ int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
+ attr.route_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS);
+ if (IS_ERR(int_port)) {
+ err = PTR_ERR(int_port);
+ goto out;
+ }
+ esw_attr->int_port = int_port;
+ }
+
+out:
+ if (flow_attr->tun_ip_version == 4)
+ mlx5e_route_lookup_ipv4_put(&attr);
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->tun_ip_version == 6)
+ mlx5e_route_lookup_ipv6_put(&attr);
+#endif
+ return err;
+}
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
@@ -469,10 +799,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
- void *headers_c,
- void *headers_v, u8 *match_level)
+ u8 *match_level)
{
struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ struct netlink_ext_ack *extack = f->common.extack;
int err = 0;
if (!tunnel) {
@@ -499,6 +834,106 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
goto out;
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_basic key_basic = {};
+ struct flow_dissector_key_basic mask_basic = {
+ .n_proto = htons(0xFFFF),
+ };
+ struct flow_match_basic match_basic = {
+ .key = &key_basic, .mask = &mask_basic,
+ };
+ struct flow_match_control match;
+ u16 addr_type;
+
+ flow_rule_match_enc_control(rule, &match);
+ addr_type = match.key->addr_type;
+
+ /* For tunnel addr_type used same key id`s as for non-tunnel */
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->dst));
+
+ key_basic.n_proto = htons(ETH_P_IP);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
+ } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ key_basic.n_proto = htons(ETH_P_IPV6);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_enc_ip(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+ match.key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+ match.mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+ match.key->ttl);
+
+ if (match.mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB
+ (priv->mdev,
+ ft_field_support.outer_ipv4_ttl)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on TTL is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
+ /* let software handle IP fragments */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+ return 0;
+
out:
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 6f9a78c85ffd..b38f693bbb52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -11,11 +11,19 @@
#include "en.h"
#include "en_rep.h"
+#ifdef CONFIG_MLX5_ESWITCH
+
enum {
MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
MLX5E_TC_TUNNEL_TYPE_VXLAN,
MLX5E_TC_TUNNEL_TYPE_GENEVE,
MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+};
+
+struct mlx5e_encap_key {
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
};
struct mlx5e_tc_tunnel {
@@ -41,11 +49,15 @@ struct mlx5e_tc_tunnel {
struct flow_cls_offload *f,
void *headers_c,
void *headers_v);
+ bool (*encap_info_equal)(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b);
+ int (*get_remote_ifindex)(struct net_device *mirred_dev);
};
extern struct mlx5e_tc_tunnel vxlan_tunnel;
extern struct mlx5e_tc_tunnel geneve_tunnel;
extern struct mlx5e_tc_tunnel gre_tunnel;
+extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
@@ -57,17 +69,33 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
#else
static inline int
mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+static inline int
+mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
#endif
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct net_device *filter_dev);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
@@ -76,8 +104,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
- void *headers_c,
- void *headers_v, u8 *match_level);
+ u8 *match_level);
int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
@@ -85,4 +112,9 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
void *headers_c,
void *headers_v);
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b);
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
new file mode 100644
index 000000000000..5aff97914367
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -0,0 +1,1757 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <net/fib_notifier.h>
+#include <net/nexthop.h>
+#include "tc_tun_encap.h"
+#include "en_tc.h"
+#include "tc_tun.h"
+#include "rep/tc.h"
+#include "diag/en_tc_tracepoint.h"
+
+enum {
+ MLX5E_ROUTE_ENTRY_VALID = BIT(0),
+};
+
+static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ int out_index)
+{
+ struct net_device *route_dev;
+ int err = 0;
+
+ route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
+
+ if (!route_dev || !netif_is_ovs_master(route_dev))
+ goto out;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, out_index);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+
+ return err;
+}
+
+struct mlx5e_route_key {
+ int ip_version;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } endpoint_ip;
+};
+
+struct mlx5e_route_entry {
+ struct mlx5e_route_key key;
+ struct list_head encap_entries;
+ struct list_head decap_flows;
+ u32 flags;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ int tunnel_dev_index;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_tc_tun_encap {
+ struct mlx5e_priv *priv;
+ struct notifier_block fib_nb;
+ spinlock_t route_lock; /* protects route_tbl */
+ unsigned long route_tbl_last_update;
+ DECLARE_HASHTABLE(route_tbl, 8);
+};
+
+static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
+{
+ return r->flags & MLX5E_ROUTE_ENTRY_VALID;
+}
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_rx_tun_attr *tun_attr;
+ void *daddr, *saddr;
+ u8 ip_version;
+
+ tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
+ if (!tun_attr)
+ return -ENOMEM;
+
+ esw_attr->rx_tun_attr = tun_attr;
+ ip_version = mlx5e_tc_get_ip_version(spec, true);
+
+ if (ip_version == 4) {
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ tun_attr->dst_ip.v4 = *(__be32 *)daddr;
+ tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
+ return 0;
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (ip_version == 6) {
+ int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+ struct in6_addr zerov6 = {};
+
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
+ memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
+ !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
+ return 0;
+ }
+#endif
+ /* Only set the flag if both src and dst ip addresses exist. They are
+ * required to establish routing.
+ */
+ flow_flag_set(flow, TUN_RX);
+ flow->attr->tun_ip_version = ip_version;
+ return 0;
+}
+
+static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
+{
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+
+ return all_flow_encaps_valid;
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
+ return;
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = e->encap_size;
+ reformat_params.data = e->encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
+ continue;
+
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+
+ /* Do not offload flows with unresolved neighbors */
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ continue;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ continue;
+ }
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
+ if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
+ continue;
+ spec = &flow->attr->parse_attr->spec;
+
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+}
+
+static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
+ struct list_head *flow_list,
+ int index)
+{
+ if (IS_ERR(mlx5e_flow_get(flow))) {
+ /* Flow is being deleted concurrently. Wait for it to be
+ * unoffloaded from hardware, otherwise deleting encap will
+ * fail.
+ */
+ wait_for_completion(&flow->del_hw_done);
+ return;
+ }
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_entry_index = index;
+ list_add(&flow->tmp_list, flow_list);
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+ struct encap_flow_item *efi;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ mlx5e_take_tmp_flow(flow, flow_list, efi->index);
+ }
+}
+
+/* Takes reference to all flows attached to route and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+ struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &r->decap_flows, decap_routes)
+ mlx5e_take_tmp_flow(flow, flow_list, 0);
+}
+
+typedef bool (match_cb)(struct mlx5e_encap_entry *);
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e,
+ match_cb match)
+{
+ struct mlx5e_encap_entry *next = NULL;
+
+retry:
+ rcu_read_lock();
+
+ /* find encap with non-zero reference counter value */
+ for (next = e ?
+ list_next_or_null_rcu(&nhe->encap_list,
+ &e->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list) :
+ list_first_or_null_rcu(&nhe->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list);
+ next;
+ next = list_next_or_null_rcu(&nhe->encap_list,
+ &next->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list))
+ if (mlx5e_encap_take(next))
+ break;
+
+ rcu_read_unlock();
+
+ /* release starting encap */
+ if (e)
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
+ if (!next)
+ return next;
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+ if (!match(next)) {
+ e = next;
+ goto retry;
+ }
+
+ return next;
+}
+
+static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+{
+ return e->flags & MLX5_ENCAP_ENTRY_VALID;
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+}
+
+static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+{
+ return e->compl_result >= 0;
+}
+
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_encap_entry *e = NULL;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = ipv6_stub->nd_tbl;
+#endif
+ else
+ return;
+
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
+ * next one.
+ */
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+ struct encap_flow_item *efi, *tmp;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ list_add(&flow->tmp_list, &flow_list);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ counter = mlx5e_tc_get_counter(flow);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ if (neigh_used) {
+ /* release current encap before breaking the loop */
+ mlx5e_encap_put(priv, e);
+ break;
+ }
+ }
+
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ WARN_ON(!list_empty(&e->flows));
+
+ if (e->compl_result > 0) {
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ }
+
+ kfree(e->tun_info);
+ kfree(e->encap_header);
+ kfree_rcu(e, rcu);
+}
+
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index);
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ if (attr->esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5e_detach_encap_route(priv, flow, out_index);
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
+ a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
+}
+
+static int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
+static int hash_encap_info(struct mlx5e_encap_key *key)
+{
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tc_tunnel->tunnel_type);
+}
+
+static int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_key e_key;
+ struct mlx5e_encap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (e->tunnel->encap_info_equal(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ attr->dest_chain = 0;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
+ vport_num);
+ err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err >= 0) {
+ esw_attr->dests[out_index].src_port_rewrite_act_id = err;
+ err = 0;
+ }
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ int act_id = attr->dests[out_index].src_port_rewrite_act_id;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
+ vport_num);
+ mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ unsigned int ret;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ spin_lock_bh(&encap->route_lock);
+ ret = encap->route_tbl_last_update;
+ spin_unlock_bh(&encap->route_lock);
+ return ret;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ const struct ip_tunnel_info *tun_info;
+ const struct mlx5e_mpls_info *mpls_info;
+ unsigned long tbl_time_before = 0;
+ struct mlx5e_encap_entry *e;
+ struct mlx5e_encap_key key;
+ bool entry_created = false;
+ unsigned short family;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ mpls_info = &parse_attr->mpls_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(&key);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
+
+ /* must verify if encap is valid or not */
+ if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
+ entry_created = true;
+ INIT_LIST_HEAD(&e->route_list);
+
+ tun_info = mlx5e_dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
+ e->tun_info = tun_info;
+ memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err_init;
+
+ INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ if (family == AF_INET)
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
+ goto out_err;
+ }
+ e->compl_result = 1;
+
+attach_flow:
+ err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+ tbl_time_before, out_index);
+ if (err)
+ goto out_err;
+
+ err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
+ if (err == -EOPNOTSUPP) {
+ /* If device doesn't support int port offload,
+ * redirect to uplink vport.
+ */
+ mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
+ err = 0;
+ } else if (err) {
+ goto out_err;
+ }
+
+ flow->encaps[out_index].e = e;
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ *encap_valid = true;
+ } else {
+ *encap_valid = false;
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
+ return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
+}
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err = 0;
+
+ if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ reformat_params.size = sizeof(attr->eth);
+ reformat_params.data = &attr->eth;
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
+static int cmp_route_info(struct mlx5e_route_key *a,
+ struct mlx5e_route_key *b)
+{
+ if (a->ip_version == 4 && b->ip_version == 4)
+ return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
+ sizeof(a->endpoint_ip.v4));
+ else if (a->ip_version == 6 && b->ip_version == 6)
+ return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
+ sizeof(a->endpoint_ip.v6));
+ return 1;
+}
+
+static u32 hash_route_info(struct mlx5e_route_key *key)
+{
+ if (key->ip_version == 4)
+ return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
+ return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
+}
+
+static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r)
+{
+ WARN_ON(!list_empty(&r->decap_flows));
+ WARN_ON(!list_empty(&r->encap_entries));
+
+ kfree_rcu(r, rcu);
+}
+
+static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
+ if (!refcount_dec_and_test(&r->refcnt))
+ return;
+ hash_del_rcu(&r->hlist);
+ mlx5e_route_dealloc(priv, r);
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_route_key r_key;
+ struct mlx5e_route_entry *r;
+
+ hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
+ r_key = r->key;
+ if (!cmp_route_info(&r_key, key) &&
+ refcount_inc_not_zero(&r->refcnt))
+ return r;
+ }
+ return NULL;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get_create(struct mlx5e_priv *priv,
+ struct mlx5e_route_key *key,
+ int tunnel_dev_index,
+ unsigned long *route_tbl_change_time)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ struct mlx5e_route_entry *r;
+ u32 hash_key;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ hash_key = hash_route_info(key);
+ spin_lock_bh(&encap->route_lock);
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+ if (r) {
+ if (!mlx5e_route_entry_valid(r)) {
+ mlx5e_route_put_locked(priv, r);
+ return ERR_PTR(-EINVAL);
+ }
+ return r;
+ }
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ r->key = *key;
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+ r->tunnel_dev_index = tunnel_dev_index;
+ refcount_set(&r->refcnt, 1);
+ INIT_LIST_HEAD(&r->decap_flows);
+ INIT_LIST_HEAD(&r->encap_entries);
+
+ spin_lock_bh(&encap->route_lock);
+ *route_tbl_change_time = encap->route_tbl_last_update;
+ hash_add(encap->route_tbl, &r->hlist, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
+{
+ u32 hash_key = hash_route_info(key);
+ struct mlx5e_route_entry *r;
+
+ spin_lock_bh(&encap->route_lock);
+ encap->route_tbl_last_update = jiffies;
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+struct mlx5e_tc_fib_event_data {
+ struct work_struct work;
+ unsigned long event;
+ struct mlx5e_route_entry *r;
+ struct net_device *ul_dev;
+};
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work);
+static struct mlx5e_tc_fib_event_data *
+mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), flags);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
+ fib_work->event = event;
+ fib_work->ul_dev = ul_dev;
+
+ return fib_work;
+}
+
+static int
+mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ unsigned long event)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct net_device *ul_dev;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ ul_dev = uplink_rpriv->netdev;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
+ if (!fib_work)
+ return -ENOMEM;
+
+ dev_hold(ul_dev);
+ refcount_inc(&r->refcnt);
+ fib_work->r = r;
+ queue_work(priv->wq, &fib_work->work);
+
+ return 0;
+}
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_before, tbl_time_after;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (!esw_attr->rx_tun_attr)
+ goto out;
+
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ tbl_time_after = tbl_time_before;
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
+ if (err || !esw_attr->rx_tun_attr->decap_vport)
+ goto out;
+
+ key.ip_version = attr->tun_ip_version;
+ if (key.ip_version == 4)
+ key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
+ else
+ key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
+ &tbl_time_after);
+ if (IS_ERR(r)) {
+ err = PTR_ERR(r);
+ goto out;
+ }
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ goto out;
+ }
+ }
+
+ flow->decap_route = r;
+ list_add(&flow->decap_routes, &r->decap_flows);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return 0;
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_after = tbl_time_before;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ unsigned short family;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+
+ if (family == AF_INET) {
+ key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
+ key.ip_version = 4;
+ } else if (family == AF_INET6) {
+ key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
+ key.ip_version = 6;
+ }
+
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err || !(esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+ return err;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
+ &tbl_time_after);
+ if (IS_ERR(r))
+ return PTR_ERR(r);
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ return err;
+ }
+ }
+
+ flow->encap_routes[out_index].r = r;
+ if (new_encap_entry)
+ list_add(&e->route_list, &r->encap_entries);
+ flow->encap_routes[out_index].index = out_index;
+ return 0;
+}
+
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r = flow->decap_route;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->decap_routes);
+ flow->decap_route = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index)
+{
+ struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e, *tmp;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ flow->encap_routes[out_index].r = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
+ list_del_init(&e->route_list);
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ esw_attr = attr->esw_attr;
+
+ if (flow_flag_test(flow, SLOW))
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ attr->modify_hdr = NULL;
+
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+ }
+}
+
+static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
+ struct net_device *tunnel_dev,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
+ mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
+ mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
+ e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+
+ err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
+ e->out_dev, e->route_dev_ifindex,
+ flow->tmp_entry_index);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
+ continue;
+ }
+
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
+ err);
+ continue;
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ goto offload_to_slow_path;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ goto offload_to_slow_path;
+ }
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
+ if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ } else {
+offload_to_slow_path:
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ }
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ struct mlx5e_encap_entry *e;
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ list_for_each_entry(e, &r->encap_entries, route_list) {
+ LIST_HEAD(encap_flows);
+
+ mlx5e_take_all_encap_flows(e, &encap_flows);
+ if (list_empty(&encap_flows))
+ continue;
+
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_invalidate_encap(priv, e, &encap_flows);
+
+ if (!replace) {
+ list_splice(&encap_flows, flow_list);
+ continue;
+ }
+
+ mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
+ list_splice(&encap_flows, flow_list);
+ }
+
+ return 0;
+}
+
+static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, flow_list, tmp_list)
+ if (mlx5e_is_offloaded_flow(flow))
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+}
+
+static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
+ struct list_head *decap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, decap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
+ err);
+ continue;
+ }
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ flow_flag_set(flow, OFFLOADED);
+ }
+ }
+}
+
+static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ LIST_HEAD(decap_flows);
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ mlx5e_take_all_route_decap_flows(r, &decap_flows);
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_unoffload_flow_list(priv, &decap_flows);
+ if (replace)
+ mlx5e_reoffload_decap(priv, &decap_flows);
+
+ list_splice(&decap_flows, flow_list);
+
+ return 0;
+}
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work)
+{
+ struct mlx5e_tc_fib_event_data *event_data =
+ container_of(work, struct mlx5e_tc_fib_event_data, work);
+ struct net_device *ul_dev = event_data->ul_dev;
+ struct mlx5e_priv *priv = netdev_priv(ul_dev);
+ struct mlx5e_route_entry *r = event_data->r;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+ bool replace;
+ int err;
+
+ /* sync with concurrent neigh updates */
+ rtnl_lock();
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
+
+ if (!mlx5e_route_entry_valid(r) && !replace)
+ goto out;
+
+ err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
+ err);
+
+ err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
+ err);
+
+ if (replace)
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ rtnl_unlock();
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ mlx5e_route_put(priv, event_data->r);
+ dev_put(event_data->ul_dev);
+ kfree(event_data);
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ if (fen_info->fi->nh)
+ return NULL;
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->dst_len != 32)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ key.endpoint_ip.v4 = htonl(fen_info->dst);
+ key.ip_version = 4;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib6_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib6_entry_notifier_info, info);
+ fib_dev = fib6_info_nh_dev(fen_info->rt);
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->rt->fib6_dst.plen != 128)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
+ sizeof(fen_info->rt->fib6_dst.addr));
+ key.ip_version = 6;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct fib_notifier_info *info = ptr;
+ struct mlx5e_tc_tun_encap *encap;
+ struct net_device *ul_dev;
+ struct mlx5e_priv *priv;
+
+ encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
+ priv = encap->priv;
+ ul_dev = priv->netdev;
+ priv = netdev_priv(ul_dev);
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET)
+ fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
+ else if (info->family == AF_INET6)
+ fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
+ else
+ return NOTIFY_DONE;
+
+ if (!IS_ERR_OR_NULL(fib_work)) {
+ queue_work(priv->wq, &fib_work->work);
+ } else if (IS_ERR(fib_work)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
+ mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
+ PTR_ERR(fib_work));
+ }
+
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_tun_encap *encap;
+ int err;
+
+ encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+
+ encap->priv = priv;
+ encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
+ spin_lock_init(&encap->route_lock);
+ hash_init(encap->route_tbl);
+ err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
+ NULL, NULL);
+ if (err) {
+ kvfree(encap);
+ return ERR_PTR(err);
+ }
+
+ return encap;
+}
+
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
+{
+ if (!encap)
+ return;
+
+ unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
+ flush_workqueue(encap->priv->wq); /* flush fib event works */
+ kvfree(encap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
new file mode 100644
index 000000000000..d542b8476491
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUN_ENCAP_H__
+#define __MLX5_EN_TC_TUN_ENCAP_H__
+
+#include "tc_priv.h"
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid);
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv);
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap);
+
+#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
index 951ea26d96bc..f5b26f5a7de4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -227,6 +227,10 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
option_key = (struct geneve_opt *)&enc_opts.key->data[0];
option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+ if (option_mask->opt_class == 0 && option_mask->type == 0 &&
+ !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4))
+ return 0;
+
if (option_key->length > max_tlv_option_data_len) {
NL_SET_ERR_MSG_MOD(extack,
"Matching on GENEVE options: unsupported option len");
@@ -301,6 +305,8 @@ static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
}
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
return 0;
}
@@ -323,6 +329,34 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
}
+static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ struct ip_tunnel_info *a_info;
+ struct ip_tunnel_info *b_info;
+ bool a_has_opts, b_has_opts;
+
+ if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
+ return false;
+
+ a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
+ b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
+
+ /* keys are equal when both don't have any options attached */
+ if (!a_has_opts && !b_has_opts)
+ return true;
+
+ if (a_has_opts != b_has_opts)
+ return false;
+
+ /* geneve options stored in memory next to ip_tunnel_info struct */
+ a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
+ b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
+
+ return a_info->options_len == b_info->options_len &&
+ memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0;
+}
+
struct mlx5e_tc_tunnel geneve_tunnel = {
.tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE,
.match_level = MLX5_MATCH_L4,
@@ -332,4 +366,5 @@ struct mlx5e_tc_tunnel geneve_tunnel = {
.generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve,
.parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve,
.parse_tunnel = mlx5e_tc_tun_parse_geneve,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
index 58b13192df23..ada14f0574dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
@@ -80,6 +80,8 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
}
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
return 0;
}
@@ -92,4 +94,5 @@ struct mlx5e_tc_tunnel gre_tunnel = {
.generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap,
.parse_udp_ports = NULL,
.parse_tunnel = mlx5e_tc_tun_parse_gretap,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
new file mode 100644
index 000000000000..c5b1617d556f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/bareudp.h>
+#include <net/mpls.h>
+#include "en/tc_tun.h"
+
+static bool can_offload(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
+}
+
+static int calc_hlen(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) + MPLS_HLEN;
+}
+
+static int init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &mplsoudp_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ return 0;
+}
+
+static int generate_ip_tun_hdr(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *r)
+{
+ const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ const struct mlx5e_mpls_info *mpls_info = &r->mpls_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct mpls_shim_hdr *mpls;
+
+ mpls = (struct mpls_shim_hdr *)(udp + 1);
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos);
+
+ return 0;
+}
+
+static int parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+}
+
+static int parse_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_match_mpls match;
+ void *misc2_c;
+ void *misc2_v;
+
+ if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
+ !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ return -EOPNOTSUPP;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return -EOPNOTSUPP;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
+ flow_rule_match_mpls(rule, &match);
+
+ /* Only support matching the first LSE */
+ if (match.mask->used_lses != 1)
+ return -EOPNOTSUPP;
+
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_label,
+ match.mask->ls[0].mpls_label);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_label,
+ match.key->ls[0].mpls_label);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_exp,
+ match.mask->ls[0].mpls_tc);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.mask->ls[0].mpls_bos);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.key->ls[0].mpls_bos);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.mask->ls[0].mpls_ttl);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.key->ls[0].mpls_ttl);
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel mplsoudp_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = can_offload,
+ .calc_hlen = calc_hlen,
+ .init_encap_attr = init_encap_attr,
+ .generate_ip_tun_hdr = generate_ip_tun_hdr,
+ .parse_udp_ports = parse_udp_ports,
+ .parse_tunnel = parse_tunnel,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
index 37b176801bcc..fd07c4cbfd1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -136,9 +136,19 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
be32_to_cpu(enc_keyid.key->keyid));
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
return 0;
}
+static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev)
+{
+ const struct vxlan_dev *vxlan = netdev_priv(mirred_dev);
+ const struct vxlan_rdst *dst = &vxlan->default_dst;
+
+ return dst->remote_ifindex;
+}
+
struct mlx5e_tc_tunnel vxlan_tunnel = {
.tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN,
.match_level = MLX5_MATCH_L4,
@@ -148,4 +158,6 @@ struct mlx5e_tc_tunnel vxlan_tunnel = {
.generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan,
.parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan,
.parse_tunnel = mlx5e_tc_tun_parse_vxlan,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+ .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
new file mode 100644
index 000000000000..d4239e3b3c88
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "tir.h"
+#include "params.h"
+#include <linux/mlx5/transobj.h>
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+
+/* max() doesn't work inside square brackets. */
+#define MLX5E_TIR_CMD_IN_SZ_DW ( \
+ MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \
+ MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \
+)
+
+struct mlx5e_tir_builder {
+ u32 in[MLX5E_TIR_CMD_IN_SZ_DW];
+ bool modify;
+};
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify)
+{
+ struct mlx5e_tir_builder *builder;
+
+ builder = kvzalloc(sizeof(*builder), GFP_KERNEL);
+ builder->modify = modify;
+
+ return builder;
+}
+
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder)
+{
+ kvfree(builder);
+}
+
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder)
+{
+ memset(builder->in, 0, sizeof(builder->in));
+}
+
+static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder)
+{
+ if (builder->modify)
+ return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx);
+ return MLX5_ADDR_OF(create_tir_in, builder->in, ctx);
+}
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
+ MLX5_SET(tirc, tirc, inline_rqn, rqn);
+}
+
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, rqtn);
+ MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+}
+
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ const unsigned int rough_max_l2_l3_hdr_sz = 256;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+
+ switch (pkt_merge_param->type) {
+ case MLX5E_PACKET_MERGE_LRO:
+ MLX5_SET(tirc, tirc, packet_merge_mask,
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5e_hfunc_to_hw(u8 hfunc)
+{
+ switch (hfunc) {
+ case ETH_RSS_HASH_TOP:
+ return MLX5_RX_HASH_FN_TOEPLITZ;
+ case ETH_RSS_HASH_XOR:
+ return MLX5_RX_HASH_FN_INVERTED_XOR8;
+ default:
+ return MLX5_RX_HASH_FN_NONE;
+ }
+}
+
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ void *hfso;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc));
+ if (rss_hash->hfunc == ETH_RSS_HASH_TOP) {
+ const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, rss_hash->toeplitz_hash_key, len);
+ }
+
+ if (inner)
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
+ else
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields);
+}
+
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, tls_en, 1);
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+}
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg)
+{
+ int err;
+
+ tir->mdev = mdev;
+
+ err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn);
+ if (err)
+ return err;
+
+ if (reg) {
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ mutex_lock(&res->td.list_lock);
+ list_add(&tir->list, &res->td.tirs_list);
+ mutex_unlock(&res->td.list_lock);
+ } else {
+ INIT_LIST_HEAD(&tir->list);
+ }
+
+ return 0;
+}
+
+void mlx5e_tir_destroy(struct mlx5e_tir *tir)
+{
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ /* Skip mutex if list_del is no-op (the TIR wasn't registered in the
+ * list). list_empty will never return true for an item of tirs_list,
+ * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency
+ * of the list->next value.
+ */
+ if (!list_empty(&tir->list)) {
+ mutex_lock(&res->td.list_lock);
+ list_del(&tir->list);
+ mutex_unlock(&res->td.list_lock);
+ }
+
+ mlx5_core_destroy_tir(tir->mdev, tir->tirn);
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder)
+{
+ return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
new file mode 100644
index 000000000000..857a84bcd53a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_TIR_H__
+#define __MLX5_EN_TIR_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_rss_params_hash {
+ u8 hfunc;
+ u8 toeplitz_hash_key[40];
+};
+
+struct mlx5e_rss_params_traffic_type {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+struct mlx5e_tir_builder;
+struct mlx5e_packet_merge_param;
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support);
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param);
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner);
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder);
+
+struct mlx5_core_dev;
+
+struct mlx5e_tir {
+ struct mlx5_core_dev *mdev;
+ u32 tirn;
+ struct list_head list;
+};
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg);
+void mlx5e_tir_destroy(struct mlx5e_tir *tir);
+
+static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir)
+{
+ return tir->tirn;
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder);
+
+#endif /* __MLX5_EN_TIR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
new file mode 100644
index 000000000000..201ac7dd338f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#include <net/page_pool.h>
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/trap.h"
+
+static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi);
+ struct mlx5e_ch_stats *ch_stats = trap_ctx->stats;
+ struct mlx5e_rq *rq = &trap_ctx->rq;
+ bool busy = false;
+ int work_done = 0;
+
+ rcu_read_lock();
+
+ ch_stats->poll++;
+
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= rq->post_wqes(rq);
+
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
+ return work_done;
+}
+
+static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = t->mdev;
+ struct mlx5e_priv *priv = t->priv;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = t->pdev;
+ rq->netdev = priv->netdev;
+ rq->priv = priv;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &priv->trap_stats.rq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+ mlx5e_rq_set_trap_handlers(rq, params);
+}
+
+static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t)
+{
+ struct mlx5e_rq_param *rq_param = &t->rq_param;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder trap_moder = {};
+ struct mlx5e_rq *rq = &t->rq;
+ int node;
+ int err;
+
+ node = dev_to_node(mdev->device);
+
+ ccp.node = node;
+ ccp.ch_stats = t->stats;
+ ccp.napi = &t->napi;
+ ccp.ix = 0;
+ err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq);
+ if (err)
+ return err;
+
+ mlx5e_init_trap_rq(t, &t->params, rq);
+ err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq);
+ if (err)
+ goto err_destroy_cq;
+
+ return 0;
+
+err_destroy_cq:
+ mlx5e_close_cq(&rq->cq);
+
+ return err;
+}
+
+static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
+{
+ mlx5e_close_rq(rq);
+ mlx5e_close_cq(&rq->cq);
+}
+
+static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
+ u32 rqn)
+{
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn);
+ err = mlx5e_tir_init(tir, builder, mdev, true);
+
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
+ int max_mtu, u16 q_counter,
+ struct mlx5e_trap *t)
+{
+ struct mlx5e_params *params = &t->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(mdev, params);
+ params->sw_mtu = max_mtu;
+ mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param);
+}
+
+static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_trap *t;
+ int err;
+
+ t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu));
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t);
+
+ t->priv = priv;
+ t->mdev = priv->mdev;
+ t->tstamp = &priv->tstamp;
+ t->pdev = mlx5_core_dma_dev(priv->mdev);
+ t->netdev = priv->netdev;
+ t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ t->stats = &priv->trap_stats.ch;
+
+ netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll);
+
+ err = mlx5e_open_trap_rq(priv, t);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn);
+ if (err)
+ goto err_close_trap_rq;
+
+ return t;
+
+err_close_trap_rq:
+ mlx5e_close_trap_rq(&t->rq);
+err_napi_del:
+ netif_napi_del(&t->napi);
+ kvfree(t);
+ return ERR_PTR(err);
+}
+
+void mlx5e_close_trap(struct mlx5e_trap *trap)
+{
+ mlx5e_tir_destroy(&trap->tir);
+ mlx5e_close_trap_rq(&trap->rq);
+ netif_napi_del(&trap->napi);
+ kvfree(trap);
+}
+
+static void mlx5e_activate_trap(struct mlx5e_trap *trap)
+{
+ napi_enable(&trap->napi);
+ mlx5e_activate_rq(&trap->rq);
+ mlx5e_trigger_napi_sched(&trap->napi);
+}
+
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap = priv->en_trap;
+
+ mlx5e_deactivate_rq(&trap->rq);
+ napi_disable(&trap->napi);
+}
+
+static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap;
+
+ trap = mlx5e_open_trap(priv);
+ if (IS_ERR(trap))
+ goto out;
+
+ mlx5e_activate_trap(trap);
+out:
+ return trap;
+}
+
+static void mlx5e_del_trap_queue(struct mlx5e_priv *priv)
+{
+ mlx5e_deactivate_trap(priv);
+ mlx5e_close_trap(priv->en_trap);
+ priv->en_trap = NULL;
+}
+
+static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap)
+{
+ return en_trap->tir.tirn;
+}
+
+static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
+{
+ bool open_queue = !priv->en_trap;
+ struct mlx5e_trap *trap;
+ int err;
+
+ if (open_queue) {
+ trap = mlx5e_add_trap_queue(priv);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ priv->en_trap = trap;
+ }
+
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ err = -EINVAL;
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ if (open_queue)
+ mlx5e_del_trap_queue(priv);
+ return err;
+}
+
+static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
+{
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ mlx5e_remove_vlan_trap(priv->fs);
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ mlx5e_remove_mac_trap(priv->fs);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ return -EINVAL;
+ }
+ if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev))
+ mlx5e_del_trap_queue(priv);
+
+ return 0;
+}
+
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx)
+{
+ int err = 0;
+
+ /* Traps are unarmed when interface is down, no need to update
+ * them. The configuration is saved in the core driver,
+ * queried and applied upon interface up operation in
+ * mlx5e_open_locked().
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ switch (trap_ctx->action) {
+ case DEVLINK_TRAP_ACTION_TRAP:
+ err = mlx5e_handle_action_trap(priv, trap_ctx->id);
+ break;
+ case DEVLINK_TRAP_ACTION_DROP:
+ err = mlx5e_handle_action_drop(priv, trap_ctx->id);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__,
+ trap_ctx->action);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable)
+{
+ enum devlink_trap_action action;
+ int err;
+
+ err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action);
+ if (err)
+ return err;
+ if (action == DEVLINK_TRAP_ACTION_TRAP)
+ err = enable ? mlx5e_handle_action_trap(priv, trap_id) :
+ mlx5e_handle_action_drop(priv, trap_id);
+ return err;
+}
+
+static const int mlx5e_traps_arr[] = {
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER,
+};
+
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) {
+ err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable);
+ if (err)
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
new file mode 100644
index 000000000000..aa3f17658c6d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies */
+
+#ifndef __MLX5E_TRAP_H__
+#define __MLX5E_TRAP_H__
+
+#include "../en.h"
+#include "../devlink.h"
+
+struct mlx5e_trap {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_tir tir;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+
+ struct mlx5e_params params;
+ struct mlx5e_rq_param rq_param;
+};
+
+void mlx5e_close_trap(struct mlx5e_trap *trap);
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv);
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx);
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index a226277b0980..853f312cd757 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -5,27 +5,84 @@
#define __MLX5_EN_TXRX_H___
#include "en.h"
+#include <linux/indirect_call_wrapper.h>
-#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
- MLX5E_SQ_NOPS_ROOM)
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_TLS_ROOM (0)
-#else
-/* TLS offload requires additional stop_room for:
- * - a resync SKB.
- * kTLS offload requires fixed additional stop_room for:
- * - a static params WQE, and a progress params WQE.
- * The additional MTU-depending room for the resync DUMP WQEs
- * will be calculated and added in runtime.
+#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+/* IPSEC inline data includes:
+ * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for
+ * next header.
+ * 2. ESP authentication data: 16 bytes for ICV.
+ */
+#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \
+ 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS)
+
+/* 366 should be big enough to cover all L2, L3 and L4 headers with possible
+ * encapsulations.
*/
-#define MLX5E_SQ_TLS_ROOM \
- (MLX5_SEND_WQE_MAX_WQEBBS + \
- MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
+#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \
+ MLX5_SEND_WQE_DS)
+
+/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */
+#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \
+ MLX5E_MAX_TX_INLINE_DS + \
+ MLX5E_MAX_TX_IPSEC_DS + \
+ MAX_SKB_FRAGS + 1, \
+ MLX5_SEND_WQEBB_NUM_DS)
+
+#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
+
+static inline
+ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
+{
+ return INDIRECT_CALL_2(func, mlx5_real_time_cyc2time, mlx5_timecounter_cyc2time,
+ clock, cqe_ts);
+}
+
+enum mlx5e_icosq_wqe_type {
+ MLX5E_ICOSQ_WQE_NOP,
+ MLX5E_ICOSQ_WQE_UMR_RX,
+ MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+#ifdef CONFIG_MLX5_EN_TLS
+ MLX5E_ICOSQ_WQE_UMR_TLS,
+ MLX5E_ICOSQ_WQE_SET_PSV_TLS,
+ MLX5E_ICOSQ_WQE_GET_PSV_TLS,
#endif
+};
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+/* General */
+static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb)
+{
+ return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST;
+}
+
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
+int mlx5e_napi_poll(struct napi_struct *napi, int budget);
+int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
+
+/* RX */
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
+
+/* TX */
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+
+static inline bool
+mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
+{
+ return (*fifo->pc - *fifo->cc) < fifo->mask;
+}
static inline bool
mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
@@ -33,19 +90,19 @@ mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
}
-static inline void *
-mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
void *wqe;
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
- memset(wqe, 0, size);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(wqe, 0, wqe_size);
return wqe;
}
+#define MLX5E_TX_FETCH_WQE(sq, pi) \
+ ((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe)))
+
static inline struct mlx5e_tx_wqe *
mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
{
@@ -81,28 +138,110 @@ mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
return wqe;
}
-static inline void
-mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
+struct mlx5e_tx_wqe_info {
+ struct sk_buff *skb;
+ u32 num_bytes;
+ u8 num_wqebbs;
+ u8 num_dma;
+ u8 num_fifo_pkts;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct page *resync_dump_frag_page;
+#endif
+};
+
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
- struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+}
- edge_wi = wi + nnops;
+struct mlx5e_shampo_umr {
+ u16 len;
+};
+
+struct mlx5e_icosq_wqe_info {
+ u8 wqe_type;
+ u8 num_wqebbs;
+
+ /* Auxiliary data for different wqe types. */
+ union {
+ struct {
+ struct mlx5e_rq *rq;
+ } umr;
+ struct mlx5e_shampo_umr shampo;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct {
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ } tls_set_params;
+ struct {
+ struct mlx5e_ktls_rx_resync_buf *buf;
+ } tls_get_params;
+#endif
+ };
+};
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- memset(wi, 0, sizeof(*wi));
- wi->num_wqebbs = 1;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq);
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_NOP,
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}
- sq->stats->nop += nnops;
+
+ return pi;
}
static inline void
mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
struct mlx5_wqe_ctrl_seg *ctrl)
{
- ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
/* ensure wqe is visible to device before updating doorbell record */
dma_wmb();
@@ -116,29 +255,6 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
mlx5_write64((__be32 *)ctrl, uar_map);
}
-static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
-{
- return cseg && !!cseg->tisn;
-}
-
-static inline u8
-mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
- struct sk_buff *skb)
-{
- u8 mode;
-
- if (mlx5e_transport_inline_tx_wqe(cseg))
- return MLX5_INLINE_MODE_TCP_UDP;
-
- mode = sq->min_inline_mode;
-
- if (skb_vlan_tag_present(skb) &&
- test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
- mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
-
- return mode;
-}
-
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
{
struct mlx5_core_cq *mcq;
@@ -164,6 +280,26 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type;
}
+static inline
+struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
+{
+ return &fifo->fifo[i & fifo->mask];
+}
+
+static inline
+void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
+{
+ struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
+
+ *skb_item = skb;
+}
+
+static inline
+struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
+{
+ return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
+}
+
static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
@@ -179,12 +315,78 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}
}
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
+{
+ return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+}
+
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
{
- if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
mlx5_wq_ll_reset(&rq->mpwqe.wq);
- else
+ rq->mpwqe.actual_wq_head = 0;
+ } else {
mlx5_wq_cyc_reset(&rq->wqe.wq);
+ }
+}
+
+static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn,
+ struct mlx5_err_cqe *err_cqe)
+{
+ struct mlx5_cqwq *wq = &cq->wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+ netdev_err(cq->netdev,
+ "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ cq->mcq.cqn, ci, qn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
+ mlx5_dump_err_cqe(cq->mdev, err_cqe);
+}
+
+static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ }
+}
+
+static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return rq->mpwqe.wq.cur_sz;
+ default:
+ return rq->wqe.wq.cur_sz;
+ }
+}
+
+static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_head(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_head(&rq->wqe.wq);
+ }
+}
+
+static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_counter(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_counter(&rq->wqe.wq);
+ }
}
/* SW parser related functions */
@@ -197,6 +399,15 @@ struct mlx5e_swp_spec {
u8 tun_l4_proto;
};
+static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset += VLAN_HLEN / 2;
+ eseg->swp_outer_l4_offset += VLAN_HLEN / 2;
+ eseg->swp_inner_l3_offset += VLAN_HLEN / 2;
+ eseg->swp_inner_l4_offset += VLAN_HLEN / 2;
+}
+
static inline void
mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
struct mlx5e_swp_spec *swp_spec)
@@ -223,11 +434,57 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
switch (swp_spec->tun_l4_proto) {
case IPPROTO_UDP:
eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
- /* fall through */
+ fallthrough;
case IPPROTO_TCP:
eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
break;
}
}
+#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
+
+static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
+{
+ WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev));
+
+ /* A WQE must not cross the page boundary, hence two conditions:
+ * 1. Its size must not exceed the page size.
+ * 2. If the WQE size is X, and the space remaining in a page is less
+ * than X, this space needs to be padded with NOPs. So, one WQE of
+ * size X may require up to X-1 WQEBBs of padding, which makes the
+ * stop room of X-1 + X.
+ * WQE size is also limited by the hardware limit.
+ */
+ WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
+
+ return MLX5E_STOP_ROOM(wqe_size);
+}
+
+static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
+{
+ return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
+}
+
+static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev)
+{
+ u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+
+ return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs);
+}
+
+static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
+{
+ u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size);
+
+ return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
+}
+
+static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
+{
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+
+ return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index f049e0ac308a..20507ef2f956 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,7 +31,7 @@
*/
#include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "en/xdp.h"
#include "en/params.h"
@@ -57,21 +57,23 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
- struct mlx5e_dma_info *di, struct xdp_buff *xdp)
+ struct page *page, struct xdp_buff *xdp)
{
- struct mlx5e_xdp_xmit_data xdptxd;
+ struct skb_shared_info *sinfo = NULL;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
+ int i;
- xdpf = convert_to_xdp_frame(xdp);
+ xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return false;
xdptxd.data = xdpf->data;
xdptxd.len = xdpf->len;
- if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
/* The xdp_buff was in the UMEM and was copied into a newly
* allocated page. The UMEM page was returned via the ZCA, and
* this new page has to be mapped at this point and has to be
@@ -96,108 +98,151 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdptxd.dma_addr = dma_addr;
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = dma_addr;
- } else {
- /* Driver assumes that convert_to_xdp_frame returns an xdp_frame
- * that points to the same memory region as the original
- * xdp_buff. It allows to map the memory only once and to use
- * the DMA_BIDIRECTIONAL mode.
- */
- xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ return false;
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ return true;
+ }
+
+ /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
+ * that points to the same memory region as the original xdp_buff. It
+ * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
+ * mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ xdpi.page.rq = rq;
+
+ dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
- dma_addr = di->addr + (xdpf->data - (void *)xdpf);
- dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
- DMA_TO_DEVICE);
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
- xdptxd.dma_addr = dma_addr;
- xdpi.page.rq = rq;
- xdpi.page.di = *di;
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+ u32 len;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ len = skb_frag_size(frag);
+ dma_sync_single_for_device(sq->pdev, addr, len,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+
+ xdptxd.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ return false;
+
+ xdpi.page.page = page;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ xdpi.page.page = skb_frag_page(frag);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
}
- return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
+ return true;
}
/* returns true if packet was consumed by xdp */
-bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len, bool xsk)
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp)
{
- struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
- struct xdp_umem *umem = rq->umem;
- struct xdp_buff xdp;
u32 act;
int err;
- if (!prog)
- return false;
-
- xdp.data = va + *rx_headroom;
- xdp_set_data_meta_invalid(&xdp);
- xdp.data_end = xdp.data + *len;
- xdp.data_hard_start = va;
- if (xsk)
- xdp.handle = di->xsk.handle;
- xdp.rxq = &rq->xdp_rxq;
-
- act = bpf_prog_run_xdp(prog, &xdp);
- if (xsk) {
- u64 off = xdp.data - xdp.data_hard_start;
-
- xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
- }
+ act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
- *rx_headroom = xdp.data - xdp.data_hard_start;
- *len = xdp.data_end - xdp.data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
case XDP_REDIRECT:
/* When XDP enabled then page-refcnt==1 here */
- err = xdp_do_redirect(rq->netdev, &xdp, prog);
+ err = xdp_do_redirect(rq->netdev, xdp, prog);
if (unlikely(err))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- if (!xsk)
- mlx5e_page_dma_unmap(rq, di);
+ if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
+ mlx5e_page_dma_unmap(rq, page);
rq->stats->xdp_redirect++;
return true;
default:
- bpf_warn_invalid_xdp_action(act);
- /* fall through */
+ bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
+ fallthrough;
case XDP_ABORTED:
xdp_abort:
trace_xdp_exception(rq->netdev, prog, act);
- /* fall through */
+ fallthrough;
case XDP_DROP:
rq->stats->xdp_drop++;
return true;
}
}
-static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
- struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi, contig_wqebbs;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 0,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nops += contig_wqebbs;
- if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
- mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
- session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi);
+ return pi;
+}
- prefetchw(session->wqe->data);
- session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
- session->pkt_count = 0;
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
- mlx5e_xdp_update_inline_state(sq);
+ pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+ };
stats->mpwqe++;
}
@@ -205,7 +250,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
u16 ds_count = session->ds_count;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -230,11 +275,11 @@ enum {
MLX5E_XDP_CHECK_START_MPWQE = 2,
};
-static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5E_XDPSQ_STOP_ROOM))) {
+ sq->stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -247,14 +292,27 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
return MLX5E_XDP_CHECK_OK;
}
-static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi,
- int check_result)
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result);
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ if (unlikely(sinfo)) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ }
+
if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++;
return false;
@@ -275,18 +333,16 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
- session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
+ if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
-static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
{
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -296,44 +352,76 @@ static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
return MLX5E_XDP_CHECK_OK;
}
-static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi,
- int check_result)
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
+}
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg = wqe->data;
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5e_tx_wqe *wqe;
dma_addr_t dma_addr = xdptxd->dma_addr;
u32 dma_len = xdptxd->len;
+ u16 ds_cnt, inline_hdr_sz;
+ u8 num_wqebbs = 1;
+ int num_frags = 0;
+ u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- prefetchw(wqe);
-
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
- if (!check_result)
- check_result = mlx5e_xmit_xdp_frame_check(sq);
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
+ ds_cnt++;
+
+ /* check_result must be 0 if sinfo is passed. */
+ if (!check_result) {
+ int stop_room = 1;
+
+ if (unlikely(sinfo)) {
+ ds_cnt += sinfo->nr_frags;
+ num_frags = sinfo->nr_frags;
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
+ * enough to hold all fragments.
+ */
+ stop_room = MLX5E_STOP_ROOM(num_wqebbs);
+ }
+
+ check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
+ }
if (unlikely(check_result < 0))
return false;
- cseg->fm_ce_se = 0;
+ pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ net_prefetchw(wqe);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ inline_hdr_sz = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
- eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
dseg++;
}
@@ -343,11 +431,45 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- sq->pc++;
+ if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
+ u8 num_pkts = 1 + num_frags;
+ int i;
+
+ memset(&cseg->trailer, 0, sizeof(cseg->trailer));
+ memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
+
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ dseg->lkey = sq->mkey_be;
+
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+
+ dseg++;
+ dseg->addr = cpu_to_be64(addr);
+ dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
+ dseg->lkey = sq->mkey_be;
+ }
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_pkts = num_pkts,
+ };
+
+ sq->pc += num_wqebbs;
+ } else {
+ cseg->fm_ce_se = 0;
+
+ sq->pc++;
+ }
sq->doorbell_cseg = cseg;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
@@ -355,7 +477,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_wqe_info *wi,
u32 *xsk_frames,
- bool recycle)
+ bool recycle,
+ struct xdp_frame_bulk *bq)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
u16 i;
@@ -368,11 +491,11 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
/* XDP_TX from the XSK RQ and XDP_REDIRECT */
dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
xdpi.frame.xdpf->len, DMA_TO_DEVICE);
- xdp_return_frame(xdpi.frame.xdpf);
+ xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
break;
case MLX5E_XDP_XMIT_MODE_PAGE:
/* XDP_TX from the regular RQ */
- mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
break;
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
@@ -386,12 +509,15 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
{
+ struct xdp_frame_bulk bq;
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
u32 xsk_frames = 0;
u16 sqcc;
int i;
+ xdp_frame_bulk_init(&bq);
+
sq = container_of(cq, struct mlx5e_xdpsq, cq);
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
@@ -408,34 +534,38 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
i = 0;
do {
- u16 wqe_counter;
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 wqe_counter, ci;
bool last_wqe;
mlx5_cqwq_pop(&cq->wq);
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
- netdev_WARN_ONCE(sq->channel->netdev,
- "Bad OP in XDPSQ CQE: 0x%x\n",
- get_cqe_opcode(cqe));
-
do {
- struct mlx5e_xdp_wqe_info *wi;
- u16 ci;
-
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
sqcc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
} while (!last_wqe);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+ }
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+ xdp_flush_frame_bulk(&bq);
+
if (xsk_frames)
- xsk_umem_complete_tx(sq->umem, xsk_frames);
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
sq->stats->cqes += i;
@@ -450,8 +580,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
{
+ struct xdp_frame_bulk bq;
u32 xsk_frames = 0;
+ xdp_frame_bulk_init(&bq);
+
+ rcu_read_lock(); /* need for xdp_return_frame_bulk */
+
while (sq->cc != sq->pc) {
struct mlx5e_xdp_wqe_info *wi;
u16 ci;
@@ -461,11 +596,14 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
sq->cc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
}
+ xdp_flush_frame_bulk(&bq);
+ rcu_read_unlock();
+
if (xsk_frames)
- xsk_umem_complete_tx(sq->umem, xsk_frames);
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -473,7 +611,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_xdpsq *sq;
- int drops = 0;
+ int nxmit = 0;
int sq_num;
int i;
@@ -493,30 +631,31 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
- struct mlx5e_xdp_xmit_data xdptxd;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
+ bool ret;
xdptxd.data = xdpf->data;
xdptxd.len = xdpf->len;
xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
xdptxd.len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
- xdp_return_frame_rx_napi(xdpf);
- drops++;
- continue;
- }
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
+ break;
xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = xdptxd.dma_addr;
- if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
+ ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
+ if (unlikely(!ret)) {
dma_unmap_single(sq->pdev, xdptxd.dma_addr,
xdptxd.len, DMA_TO_DEVICE);
- xdp_return_frame_rx_napi(xdpf);
- drops++;
+ break;
}
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ nxmit++;
}
if (flags & XDP_XMIT_FLUSH) {
@@ -525,7 +664,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
mlx5e_xmit_xdp_doorbell(sq);
}
- return n - drops;
+ return nxmit;
}
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
@@ -550,4 +689,3 @@ void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
-
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index d7587f40ecae..bc2d9034af5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -32,38 +32,22 @@
#ifndef __MLX5_EN_XDP_H__
#define __MLX5_EN_XDP_H__
+#include <linux/indirect_call_wrapper.h>
+
#include "en.h"
#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
- (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-
-#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
-
-#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
-#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
- DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
-
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
-#define MLX5E_XDP_MPW_MAX_NUM_DS \
- (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
+ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
+ sizeof(struct mlx5_wqe_inline_seg))
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
-bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len, bool xsk);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
@@ -72,6 +56,17 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo,
+ int check_result));
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo,
+ int check_result));
+INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
+INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
+
static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
@@ -87,7 +82,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* Let other device's napi(s) and XSK wakeups see our new state. */
- synchronize_rcu();
+ synchronize_net();
}
static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
@@ -111,60 +106,48 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
/* Enable inline WQEs to shift some load from a congested HCA (HW) to
* a less congested cpu (SW).
*/
-static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
+static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
{
u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
- if (session->inline_on) {
- if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
- session->inline_on = 0;
- return;
- }
+ if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+ return false;
- /* inline is false */
- if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
- session->inline_on = 1;
-}
+ if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+ return true;
-static inline bool
-mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
-{
- return session->inline_on &&
- session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
+ return cur;
}
-static inline void
-mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
+static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
- struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->num_wqebbs = 1;
- wi->num_pkts = 0;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
+ if (session->inline_on)
+ return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
+ max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
- sq->stats->nops += nnops;
+ return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
}
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_pkts;
+};
+
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
u32 dma_len = xdptxd->len;
session->pkt_count++;
+ session->bytes_count += dma_len;
if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
struct mlx5_wqe_inline_seg *inline_dseg =
@@ -186,19 +169,6 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
session->ds_count++;
}
-static inline struct mlx5e_tx_wqe *
-mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_tx_wqe *wqe;
-
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
- memset(wqe, 0, sizeof(*wqe));
-
- return wqe;
-}
-
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
struct mlx5e_xdp_info *xi)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
new file mode 100644
index 000000000000..ebada0c5af3c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <net/xdp_sock_drv.h>
+#include "pool.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ struct device *dev = mlx5_core_dma_dev(priv->mdev);
+
+ return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->pools) {
+ xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->pools), GFP_KERNEL);
+ if (unlikely(!xsk->pools))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->pools);
+ xsk->pools = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_pools(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->pools[ix] = pool;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->pools[ix] = NULL;
+
+ mlx5e_xsk_put_pools(xsk);
+}
+
+static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool)
+{
+ return xsk_pool_get_headroom(pool) <= 0xffff &&
+ xsk_pool_get_chunk_size(pool) <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = xsk_pool_get_headroom(pool);
+ xsk->chunk_size = xsk_pool_get_chunk_size(pool);
+ xsk->unaligned = pool->unaligned;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_pool(priv, pool);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
+ if (unlikely(err))
+ goto err_unmap_pool;
+
+ mlx5e_build_xsk_param(pool, &xsk);
+
+ if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
+ const char *recommendation = is_power_of_2(xsk.chunk_size) ?
+ "Upgrade firmware" : "Disable striding RQ";
+
+ mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n",
+ xsk.chunk_size, recommendation);
+ }
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
+ if (unlikely(err))
+ goto err_remove_pool;
+
+ mlx5e_activate_xsk(c);
+ mlx5e_trigger_napi_icosq(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true);
+
+ mlx5e_deactivate_rq(&c->rq);
+ mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY);
+
+ return 0;
+
+err_remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+
+err_unmap_pool:
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_pool;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!pool))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_pool;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_pool;
+
+ c = priv->channels.c[ix];
+
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_icosq(c);
+ mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT);
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);
+
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, pool, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+
+ if (unlikely(qid >= params->num_channels))
+ return -EINVAL;
+
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) :
+ mlx5e_xsk_disable_pool(priv, qid);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
new file mode 100644
index 000000000000..dca0010a0866
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_XSK_POOL_H__
+#define __MLX5_EN_XSK_POOL_H__
+
+#include "en.h"
+
+static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->pools)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->pools[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid);
+
+#endif /* __MLX5_EN_XSK_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 62fc8a128a8d..c91b54d9ff27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -3,83 +3,226 @@
#include "rx.h"
#include "en/xdp.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
/* RX data path */
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
- /* Check in advance that we have enough frames, instead of allocating
- * one-by-one, failing and moving frames to the Reuse Ring.
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &icosq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int batch, i;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
+ goto err;
+
+ BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ rq->mpwqe.pages_per_wqe);
+
+ /* If batch < pages_per_wqe, either:
+ * 1. Some (or all) descriptors were invalid.
+ * 2. dma_need_sync is true, and it fell back to allocating one frame.
+ * In either case, try to continue allocating frames one by one, until
+ * the first error, which will mean there are no more valid descriptors.
*/
- return xsk_umem_has_addrs_rq(rq->umem, count);
-}
+ for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
+ wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!wi->alloc_units[batch].xsk))
+ goto err_reuse_batch;
+ }
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- struct xdp_umem *umem = rq->umem;
- u64 handle;
+ pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+ } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ }
+ } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
+ u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size * 2),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ };
+ }
+ } else {
+ __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
+ rq->xsk_pool->chunk_size);
+ __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ .bcount = frame_size,
+ };
+ umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ .bcount = pad_size,
+ };
+ }
+ }
- if (!xsk_umem_peek_addr_rq(umem, &handle))
- return -ENOMEM;
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
- dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
- rq->buff.umem_headroom);
- dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
- /* No need to add headroom to the DMA address. In striding RQ case, we
- * just provide pages for UMR, and headroom is counted at the setup
- * stage when creating a WQE. In non-striding RQ case, headroom is
- * accounted in mlx5e_alloc_rx_wqe.
- */
- dma_info->addr = xdp_umem_get_dma(umem, handle);
+ /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
+ offset = ix * rq->mpwqe.mtts_per_wqe;
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
+ offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
+ offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
- xsk_umem_release_addr_rq(umem);
+ icosq->pc += rq->mpwqe.umr_wqebbs;
- dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ icosq->doorbell_cseg = &umr_wqe->ctrl;
return 0;
-}
-static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
-{
- xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
+err_reuse_batch:
+ while (--batch >= 0)
+ xsk_buff_free(wi->alloc_units[batch].xsk);
+
+err:
+ rq->stats->buff_alloc_err++;
+ return -ENOMEM;
}
-/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
- * the userspace if possible, and if not, this function is called to reuse them
- * in the driver.
- */
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
- mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct xdp_buff **buffs;
+ u32 contig, alloc;
+ int i;
+
+ /* mlx5e_init_frags_partition creates a 1:1 mapping between
+ * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
+ * allocate XDP buffers straight into alloc_units.
+ */
+ BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
+ sizeof(rq->wqe.alloc_units[0].xsk));
+ buffs = (struct xdp_buff **)rq->wqe.alloc_units;
+ contig = mlx5_wq_cyc_get_size(wq) - ix;
+ if (wqe_bulk <= contig) {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
+ } else {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
+ if (likely(alloc == contig))
+ alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
+ }
+
+ for (i = 0; i < alloc; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return alloc;
}
-/* Return a frame back to the hardware to fill in again. It is used by XDP when
- * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
- */
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
- struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
- mlx5e_xsk_recycle_frame(rq, handle);
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!frag->au->xsk))
+ return i;
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return wqe_bulk;
}
-static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
- u32 cqe_bcnt)
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
{
+ u32 totallen = xdp->data_end - xdp->data_meta;
+ u32 metalen = xdp->data - xdp->data_meta;
struct sk_buff *skb;
- skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+ skb = napi_alloc_skb(rq->cq.napi, totallen);
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
}
- skb_put_data(skb, data, cqe_bcnt);
+ skb_put_data(skb, xdp->data_meta, totallen);
+
+ if (metalen) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
return skb;
}
@@ -90,12 +233,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
u32 head_offset,
u32 page_idx)
{
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
- u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
- u32 cqe_bcnt32 = cqe_bcnt;
- void *va, *data;
- u32 frag_size;
- bool consumed;
+ struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
+ struct bpf_prog *prog;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -103,23 +242,16 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
return NULL;
}
- /* head_offset is not used in this function, because di->xsk.data and
- * di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, UMR pages are mapped to XSK frames, so
+ /* head_offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, in
+ * the current implementation, UMR pages are mapped to XSK frames, so
* head_offset should always be 0.
*/
WARN_ON_ONCE(head_offset);
- va = di->xsk.data;
- data = va + rx_headroom;
- frag_size = rq->buff.headroom + cqe_bcnt32;
-
- dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
- prefetch(data);
-
- rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
- rcu_read_unlock();
+ xsk_buff_set_size(xdp, cqe_bcnt);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
/* Possible flows:
* - XDP_REDIRECT to XSKMAP:
@@ -136,7 +268,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
* allocated first from the Reuse Ring, so it has enough space.
*/
- if (likely(consumed)) {
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@@ -145,49 +278,34 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
* frame. On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+ return mlx5e_xsk_construct_skb(rq, xdp);
}
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt)
{
- struct mlx5e_dma_info *di = wi->di;
- u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
- void *va, *data;
- bool consumed;
- u32 frag_size;
-
- /* wi->offset is not used in this function, because di->xsk.data and
- * di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, one page = one packet = one frame, so
+ struct xdp_buff *xdp = wi->au->xsk;
+ struct bpf_prog *prog;
+
+ /* wi->offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, the
+ * XSK allocator allocates frames per packet, instead of pages, so
* wi->offset should always be 0.
*/
WARN_ON_ONCE(wi->offset);
- va = di->xsk.data;
- data = va + rx_headroom;
- frag_size = rq->buff.headroom + cqe_bcnt;
-
- dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
- prefetch(data);
-
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
- rq->stats->wqe_err++;
- return NULL;
- }
-
- rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
- rcu_read_unlock();
+ xsk_buff_set_size(xdp, cqe_bcnt);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
- if (likely(consumed))
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
- * will be handled by mlx5e_put_rx_frag.
+ * will be handled by mlx5e_free_rx_wqe.
* On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+ return mlx5e_xsk_construct_skb(rq, xdp);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index cab0e93497ae..087c943bd8e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,37 +5,19 @@
#define __MLX5_EN_XSK_RX_H__
#include "en.h"
-#include <net/xdp_sock.h>
/* RX data path */
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
struct mlx5e_mpw_info *wi,
u16 cqe_bcnt,
u32 head_offset,
u32 page_idx);
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
-static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
-{
- if (!xsk_umem_uses_need_wakeup(rq->umem))
- return alloc_err;
-
- if (unlikely(alloc_err))
- xsk_set_rx_need_wakeup(rq->umem);
- else
- xsk_clear_rx_need_wakeup(rq->umem);
-
- return false;
-}
-
#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index c28cbae42331..ff03c43833bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -3,24 +3,21 @@
#include "setup.h"
#include "en/params.h"
+#include "en/txrx.h"
+#include "en/health.h"
+#include <net/xdp_sock_drv.h>
-/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
- * change unexpectedly, and mlx5e has a minimum valid stride size for striding
- * RQ, keep this check in the driver.
+/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal
+ * stride size of striding RQ.
*/
-#define MLX5E_MIN_XSK_CHUNK_SIZE 2048
+#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE)
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev)
{
/* AF_XDP doesn't support frames larger than PAGE_SIZE. */
- if (xsk->chunk_size > PAGE_SIZE ||
- xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
- return false;
-
- /* Current MTU and XSK headroom don't allow packets to fit the frames. */
- if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size)
+ if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
return false;
/* frag_sz is different for regular and XSK RQs, so ensure that linear
@@ -28,47 +25,78 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
*/
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk);
default: /* MLX5_WQ_TYPE_CYCLIC */
- return mlx5e_rx_is_linear_skb(params, xsk);
+ return mlx5e_rx_is_linear_skb(mdev, params, xsk);
}
}
-static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_sq_param *param)
+static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam)
{
- void *sqc = param->sqc;
- void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
+ mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq);
+}
- mlx5e_build_sq_param_common(priv, param);
+static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct xsk_buff_pool *pool,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int rq_xdp_ix;
+ int err;
- MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->priv = c->priv;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->icosq = &c->icosq;
+ rq->ix = c->ix;
+ rq->channel = c;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->xsk_pool = pool;
+ rq->stats = &c->priv->channel_stats[c->ix]->xskrq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ rq_xdp_ix = c->ix;
+ err = mlx5e_rq_set_handlers(rq, params, xsk);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
}
-static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_channel_param *cparam)
+static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
+ struct mlx5e_xsk_param *xsk)
{
- const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
-
- mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
- mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
- mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq);
- mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq);
- mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
- mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq);
+ int err;
+
+ err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq);
}
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
struct mlx5e_channel *c)
{
struct mlx5e_channel_param *cparam;
- struct dim_cq_moder icocq_moder = {};
+ struct mlx5e_create_cq_param ccp;
int err;
+ mlx5e_build_create_cq_param(&ccp, c);
+
if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
return -EINVAL;
@@ -76,55 +104,38 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (!cparam)
return -ENOMEM;
- mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
+ mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam);
- err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq);
+ err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
+ &c->xskrq.cq);
if (unlikely(err))
goto err_free_cparam;
- err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
+ err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk);
if (unlikely(err))
goto err_close_rx_cq;
- err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq);
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
+ &c->xsksq.cq);
if (unlikely(err))
goto err_close_rq;
- /* Create a separate SQ, so that when the UMEM is disabled, we could
+ /* Create a separate SQ, so that when the buff pool is disabled, we could
* close this SQ safely and stop receiving CQEs. In other case, e.g., if
- * the XDPSQ was used instead, we might run into trouble when the UMEM
- * is disabled and then reenabled, but the SQ continues receiving CQEs
- * from the old UMEM.
+ * the XDPSQ was used instead, we might run into trouble when the buff pool
+ * is disabled and then re-enabled, but the SQ continues receiving CQEs
+ * from the old buff pool.
*/
- err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
if (unlikely(err))
goto err_close_tx_cq;
- err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
- if (unlikely(err))
- goto err_close_sq;
-
- /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
- * triggered and NAPI to be called on the correct CPU.
- */
- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
- if (unlikely(err))
- goto err_close_icocq;
-
kvfree(cparam);
- spin_lock_init(&c->xskicosq_lock);
-
set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
return 0;
-err_close_icocq:
- mlx5e_close_cq(&c->xskicosq.cq);
-
-err_close_sq:
- mlx5e_close_xdpsq(&c->xsksq);
-
err_close_tx_cq:
mlx5e_close_cq(&c->xsksq.cq);
@@ -143,101 +154,38 @@ err_free_cparam:
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- napi_synchronize(&c->napi);
- synchronize_rcu(); /* Sync with the XSK wakeup. */
+ synchronize_net(); /* Sync with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
- mlx5e_close_icosq(&c->xskicosq);
- mlx5e_close_cq(&c->xskicosq.cq);
mlx5e_close_xdpsq(&c->xsksq);
mlx5e_close_cq(&c->xsksq.cq);
+
+ memset(&c->xskrq, 0, sizeof(c->xskrq));
+ memset(&c->xsksq, 0, sizeof(c->xsksq));
}
void mlx5e_activate_xsk(struct mlx5e_channel *c)
{
- mlx5e_activate_icosq(&c->xskicosq);
+ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
+ * activating XSKRQ in the middle of recovery.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- /* TX queue is created active. */
+ mlx5e_reporter_icosq_resume_recovery(c);
- spin_lock(&c->xskicosq_lock);
- mlx5e_trigger_irq(&c->xskicosq);
- spin_unlock(&c->xskicosq_lock);
+ /* TX queue is created active. */
}
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
{
- mlx5e_deactivate_rq(&c->xskrq);
- /* TX queue is disabled on close. */
- mlx5e_deactivate_icosq(&c->xskicosq);
-}
-
-static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
-{
- struct mlx5e_redirect_rqt_param direct_rrp = {
- .is_rss = false,
- {
- .rqn = rqn,
- },
- };
-
- u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
-
- return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
-}
-
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
-{
- return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
-{
- return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
- int err, i;
-
- if (!priv->xsk.refcnt)
- return 0;
-
- for (i = 0; i < chs->num; i++) {
- struct mlx5e_channel *c = chs->c[i];
-
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
- continue;
-
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
- if (unlikely(err))
- goto err_stop;
- }
-
- return 0;
-
-err_stop:
- for (i--; i >= 0; i--) {
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
- continue;
-
- mlx5e_xsk_redirect_rqt_to_drop(priv, i);
- }
-
- return err;
-}
-
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
- int i;
-
- if (!priv->xsk.refcnt)
- return;
-
- for (i = 0; i < chs->num; i++) {
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
- continue;
+ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
+ * middle of recovery. Suspend the recovery to avoid it.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
- mlx5e_xsk_redirect_rqt_to_drop(priv, i);
- }
+ /* TX queue is disabled on close. */
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index 0dd11b81c046..50e111b85efd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -12,14 +12,10 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev);
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
struct mlx5e_channel *c);
void mlx5e_close_xsk(struct mlx5e_channel *c);
void mlx5e_activate_xsk(struct mlx5e_channel *c);
void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index fe2d596cb361..367a9505ca4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -2,40 +2,37 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "tx.h"
-#include "umem.h"
+#include "pool.h"
#include "en/xdp.h"
#include "en/params.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
struct mlx5e_channel *c;
- u16 ix;
if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN;
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ if (unlikely(qid >= params->num_channels))
return -EINVAL;
- c = priv->channels.c[ix];
-
- if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
- return -ENXIO;
+ c = priv->channels.c[qid];
if (!napi_if_scheduled_mark_missed(&c->napi)) {
- /* To avoid WQE overrun, don't post a NOP if XSKICOSQ is not
+ /* To avoid WQE overrun, don't post a NOP if async_icosq is not
* active and not polled by NAPI. Return 0, because the upcoming
* activate will trigger the IRQ for us.
*/
- if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state)))
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state)))
+ return 0;
+
+ if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
return 0;
- spin_lock(&c->xskicosq_lock);
- mlx5e_trigger_irq(&c->xskicosq);
- spin_unlock(&c->xskicosq_lock);
+ mlx5e_trigger_napi_icosq(c);
}
return 0;
@@ -63,24 +60,28 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
- struct xdp_umem *umem = sq->umem;
+ struct xsk_buff_pool *pool = sq->xsk_pool;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
- struct mlx5e_xdp_xmit_data xdptxd;
bool work_done = true;
bool flush = false;
xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
for (; budget; budget--) {
- int check_result = sq->xmit_xdp_frame_check(sq);
+ int check_result = INDIRECT_CALL_2(sq->xmit_xdp_frame_check,
+ mlx5e_xmit_xdp_frame_check_mpwqe,
+ mlx5e_xmit_xdp_frame_check,
+ sq);
struct xdp_desc desc;
+ bool ret;
if (unlikely(check_result < 0)) {
work_done = false;
break;
}
- if (!xsk_umem_consume_tx(umem, &desc)) {
+ if (!xsk_tx_peek_desc(pool, &desc)) {
/* TX will get stuck until something wakes it up by
* triggering NAPI. Currently it's expected that the
* application calls sendto() if there are consumed, but
@@ -89,18 +90,22 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
- xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
xdptxd.len = desc.len;
- dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
- xdptxd.len, DMA_BIDIRECTIONAL);
+ xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
- if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
+ ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ check_result);
+ if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xsk_tx_post_err(sq, &xdpi);
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
}
flush = true;
@@ -111,7 +116,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
- xsk_umem_consume_tx_done(umem);
+ xsk_tx_release(pool);
}
return !(budget && work_done);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 79b487d89757..9c505158b975 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,6 @@
#define __MLX5_EN_XSK_TX_H__
#include "en.h"
-#include <net/xdp_sock.h>
/* TX data path */
@@ -13,15 +12,4 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
-static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
-{
- if (!xsk_umem_uses_need_wakeup(sq->umem))
- return;
-
- if (sq->pc != sq->cc)
- xsk_clear_tx_need_wakeup(sq->umem);
- else
- xsk_set_tx_need_wakeup(sq->umem);
-}
-
#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
deleted file mode 100644
index 4baaa5788320..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#include <net/xdp_sock.h>
-#include "umem.h"
-#include "setup.h"
-#include "en/params.h"
-
-static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
- struct xdp_umem *umem)
-{
- struct device *dev = priv->mdev->device;
- u32 i;
-
- for (i = 0; i < umem->npgs; i++) {
- dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
-
- if (unlikely(dma_mapping_error(dev, dma)))
- goto err_unmap;
- umem->pages[i].dma = dma;
- }
-
- return 0;
-
-err_unmap:
- while (i--) {
- dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- umem->pages[i].dma = 0;
- }
-
- return -ENOMEM;
-}
-
-static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
- struct xdp_umem *umem)
-{
- struct device *dev = priv->mdev->device;
- u32 i;
-
- for (i = 0; i < umem->npgs; i++) {
- dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- umem->pages[i].dma = 0;
- }
-}
-
-static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
-{
- if (!xsk->umems) {
- xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
- sizeof(*xsk->umems), GFP_KERNEL);
- if (unlikely(!xsk->umems))
- return -ENOMEM;
- }
-
- xsk->refcnt++;
- xsk->ever_used = true;
-
- return 0;
-}
-
-static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
-{
- if (!--xsk->refcnt) {
- kfree(xsk->umems);
- xsk->umems = NULL;
- }
-}
-
-static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
-{
- int err;
-
- err = mlx5e_xsk_get_umems(xsk);
- if (unlikely(err))
- return err;
-
- xsk->umems[ix] = umem;
- return 0;
-}
-
-static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
-{
- xsk->umems[ix] = NULL;
-
- mlx5e_xsk_put_umems(xsk);
-}
-
-static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
-{
- return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
-}
-
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
-{
- xsk->headroom = umem->headroom;
- xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
-}
-
-static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
- struct xdp_umem *umem, u16 ix)
-{
- struct mlx5e_params *params = &priv->channels.params;
- struct mlx5e_xsk_param xsk;
- struct mlx5e_channel *c;
- int err;
-
- if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
- return -EBUSY;
-
- if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
- return -EINVAL;
-
- err = mlx5e_xsk_map_umem(priv, umem);
- if (unlikely(err))
- return err;
-
- err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
- if (unlikely(err))
- goto err_unmap_umem;
-
- mlx5e_build_xsk_param(umem, &xsk);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- /* XSK objects will be created on open. */
- goto validate_closed;
- }
-
- if (!params->xdp_prog) {
- /* XSK objects will be created when an XDP program is set,
- * and the channels are reopened.
- */
- goto validate_closed;
- }
-
- c = priv->channels.c[ix];
-
- err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
- if (unlikely(err))
- goto err_remove_umem;
-
- mlx5e_activate_xsk(c);
-
- /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
- * any Fill Ring entries at the setup stage.
- */
-
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
- if (unlikely(err))
- goto err_deactivate;
-
- return 0;
-
-err_deactivate:
- mlx5e_deactivate_xsk(c);
- mlx5e_close_xsk(c);
-
-err_remove_umem:
- mlx5e_xsk_remove_umem(&priv->xsk, ix);
-
-err_unmap_umem:
- mlx5e_xsk_unmap_umem(priv, umem);
-
- return err;
-
-validate_closed:
- /* Check the configuration in advance, rather than fail at a later stage
- * (in mlx5e_xdp_set or on open) and end up with no channels.
- */
- if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
- err = -EINVAL;
- goto err_remove_umem;
- }
-
- return 0;
-}
-
-static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
-{
- struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
- &priv->xsk, ix);
- struct mlx5e_channel *c;
-
- if (unlikely(!umem))
- return -EINVAL;
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto remove_umem;
-
- /* XSK RQ and SQ are only created if XDP program is set. */
- if (!priv->channels.params.xdp_prog)
- goto remove_umem;
-
- c = priv->channels.c[ix];
- mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
- mlx5e_deactivate_xsk(c);
- mlx5e_close_xsk(c);
-
-remove_umem:
- mlx5e_xsk_remove_umem(&priv->xsk, ix);
- mlx5e_xsk_unmap_umem(priv, umem);
-
- return 0;
-}
-
-static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
- u16 ix)
-{
- int err;
-
- mutex_lock(&priv->state_lock);
- err = mlx5e_xsk_enable_locked(priv, umem, ix);
- mutex_unlock(&priv->state_lock);
-
- return err;
-}
-
-static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
-{
- int err;
-
- mutex_lock(&priv->state_lock);
- err = mlx5e_xsk_disable_locked(priv, ix);
- mutex_unlock(&priv->state_lock);
-
- return err;
-}
-
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_params *params = &priv->channels.params;
- u16 ix;
-
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
- return -EINVAL;
-
- return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
- mlx5e_xsk_disable_umem(priv, ix);
-}
-
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
-{
- struct xdp_umem_fq_reuse *reuseq;
-
- reuseq = xsk_reuseq_prepare(nentries);
- if (unlikely(!reuseq))
- return -ENOMEM;
- xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
- return 0;
-}
-
-u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
-{
- u16 res = xsk->refcnt ? params->num_channels : 0;
-
- while (res) {
- if (mlx5e_xsk_get_umem(params, xsk, res - 1))
- break;
- --res;
- }
-
- return res;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
deleted file mode 100644
index 25b4cbe58b54..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_XSK_UMEM_H__
-#define __MLX5_EN_XSK_UMEM_H__
-
-#include "en.h"
-
-static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
- struct mlx5e_xsk *xsk, u16 ix)
-{
- if (!xsk || !xsk->umems)
- return NULL;
-
- if (unlikely(ix >= params->num_channels))
- return NULL;
-
- return xsk->umems[ix];
-}
-
-struct mlx5e_xsk_param;
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
-
-/* .ndo_bpf callback. */
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
-
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
-
-u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk);
-
-#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 3022463f2284..07187028f0d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -37,18 +37,22 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/tls_rxtx.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include <en_accel/macsec.h>
#include "en.h"
#include "en/txrx.h"
#if IS_ENABLED(CONFIG_GENEVE)
+#include <net/geneve.h>
+
static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
{
return mlx5_tx_swp_supported(mdev);
}
static inline void
-mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs)
{
struct mlx5e_swp_spec swp_spec = {};
unsigned int offset = 0;
@@ -82,6 +86,8 @@ mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
}
mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
+ if (skb_vlan_tag_present(skb) && ihs)
+ mlx5e_eseg_swp_offsets_add_vlan(eseg);
}
#else
@@ -100,33 +106,117 @@ mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
udp_hdr(skb)->len = htons(payload_len);
}
-static inline struct sk_buff *
-mlx5e_accel_handle_tx(struct sk_buff *skb,
- struct mlx5e_txqsq *sq,
- struct net_device *dev,
- struct mlx5e_tx_wqe **wqe,
- u16 *pi)
+struct mlx5e_accel_tx_state {
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_accel_tx_tls_state tls;
+#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_accel_tx_ipsec_state ipsec;
+#endif
+};
+
+static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *state)
{
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ mlx5e_udp_gso_handle_tx_skb(skb);
+
#ifdef CONFIG_MLX5_EN_TLS
- if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
- skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi);
- if (unlikely(!skb))
- return NULL;
- }
+ /* May send SKBs and WQEs. */
+ if (mlx5e_ktls_skb_offloaded(skb))
+ if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb,
+ &state->tls)))
+ return false;
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
- if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
- skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb);
- if (unlikely(!skb))
- return NULL;
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) {
+ if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec)))
+ return false;
}
#endif
- if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
- mlx5e_udp_gso_handle_tx_skb(skb);
+#ifdef CONFIG_MLX5_EN_MACSEC
+ if (unlikely(mlx5e_macsec_skb_is_offload(skb))) {
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (unlikely(!mlx5e_macsec_handle_tx_skb(priv->macsec, skb)))
+ return false;
+ }
+#endif
- return skb;
+ return true;
}
+static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
+ struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state))
+ return mlx5e_ipsec_tx_ids_len(&state->ipsec);
+#endif
+
+ return 0;
+}
+
+/* Part of the eseg touched by TX offloads */
+#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
+
+static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, u16 ihs)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (xfrm_offload(skb))
+ mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
+#endif
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+ if (unlikely(mlx5e_macsec_skb_is_offload(skb)))
+ mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg);
+#endif
+
+#if IS_ENABLED(CONFIG_GENEVE)
+ if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
+ mlx5e_tx_tunnel_accel(skb, eseg, ihs);
+#endif
+}
+
+static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_state *state,
+ struct mlx5_wqe_inline_seg *inlseg)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+ mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls);
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) &&
+ state->ipsec.xo && state->ipsec.tailen)
+ mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg);
+#endif
+}
+
+static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_ktls_init_rx(priv);
+}
+
+static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_ktls_cleanup_rx(priv);
+}
+
+static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv)
+{
+ return mlx5e_ktls_init_tx(priv);
+}
+
+static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv)
+{
+ mlx5e_ktls_cleanup_tx(priv);
+}
#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
new file mode 100644
index 000000000000..285d32d2fd08
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <mlx5_core.h>
+#include "en_accel/fs_tcp.h"
+#include "fs_core.h"
+
+enum accel_fs_tcp_type {
+ ACCEL_FS_IPV4_TCP,
+ ACCEL_FS_IPV6_TCP,
+ ACCEL_FS_TCP_NUM_TYPES,
+};
+
+struct mlx5e_accel_fs_tcp {
+ struct mlx5e_flow_table tables[ACCEL_FS_TCP_NUM_TYPES];
+ struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
+};
+
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+{
+ switch (i) {
+ case ACCEL_FS_IPV4_TCP:
+ return MLX5_TT_IPV4_TCP;
+ default: /* ACCEL_FS_IPV6_TCP */
+ return MLX5_TT_IPV6_TCP;
+ }
+}
+
+static void accel_fs_tcp_set_ipv4_flow(struct mlx5_flow_spec *spec, struct sock *sk)
+{
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &inet_sk(sk)->inet_daddr, 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &inet_sk(sk)->inet_rcv_saddr, 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk)
+{
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &sk->sk_v6_daddr, 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &inet6_sk(sk)->saddr, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+}
+#endif
+
+void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
+ struct sock *sk, u32 tirn,
+ uint32_t flow_tag)
+{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ accel_fs_tcp_set_ipv4_flow(spec, sk);
+ ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
+ fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__,
+ &inet_sk(sk)->inet_rcv_saddr,
+ inet_sk(sk)->inet_sport,
+ &inet_sk(sk)->inet_daddr,
+ inet_sk(sk)->inet_dport);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (!ipv6_only_sock(sk) &&
+ ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+ accel_fs_tcp_set_ipv4_flow(spec, sk);
+ ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
+ } else {
+ accel_fs_tcp_set_ipv6_flow(spec, sk);
+ ft = &fs_tcp->tables[ACCEL_FS_IPV6_TCP];
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
+ if (!ft) {
+ flow = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
+ ntohs(inet_sk(sk)->inet_sport));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
+ ntohs(inet_sk(sk)->inet_dport));
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tirn;
+ if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG) {
+ spec->flow_context.flow_tag = flow_tag;
+ spec->flow_context.flags = FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1);
+
+ if (IS_ERR(flow))
+ fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow));
+
+out:
+ kvfree(spec);
+ return flow;
+}
+
+static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs,
+ enum accel_fs_tcp_type type)
+{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_flow_table *accel_fs_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ accel_fs_t = &fs_tcp->tables[type];
+
+ dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type));
+ rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n",
+ __func__, type, err);
+ return err;
+ }
+
+ fs_tcp->default_rules[type] = rule;
+ return 0;
+}
+
+#define MLX5E_ACCEL_FS_TCP_NUM_GROUPS (2)
+#define MLX5E_ACCEL_FS_TCP_GROUP1_SIZE (BIT(16) - 1)
+#define MLX5E_ACCEL_FS_TCP_GROUP2_SIZE (BIT(0))
+#define MLX5E_ACCEL_FS_TCP_TABLE_SIZE (MLX5E_ACCEL_FS_TCP_GROUP1_SIZE +\
+ MLX5E_ACCEL_FS_TCP_GROUP2_SIZE)
+static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
+ enum accel_fs_tcp_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+ switch (type) {
+ case ACCEL_FS_IPV4_TCP:
+ case ACCEL_FS_IPV6_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ switch (type) {
+ case ACCEL_FS_IPV4_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ break;
+ case ACCEL_FS_IPV6_TCP:
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ACCEL_FS_TCP_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ACCEL_FS_TCP_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_flow_table *ft = &accel_tcp->tables[type];
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_ACCEL_FS_TCP_TABLE_SIZE;
+ ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ fs_dbg(fs, "Created fs accel table id %u level %u\n",
+ ft->t->id, ft->t->level);
+
+ err = accel_fs_tcp_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = accel_fs_tcp_add_default_rule(fs, type);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
+
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i));
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5_flow_destination dest = {};
+ int err, i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+ dest.ft = accel_tcp->tables[i].t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest);
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i)
+{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+
+ if (IS_ERR_OR_NULL(fs_tcp->tables[i].t))
+ return;
+
+ mlx5_del_flow_rules(fs_tcp->default_rules[i]);
+ mlx5e_destroy_flow_table(&fs_tcp->tables[i]);
+ fs_tcp->tables[i].t = NULL;
+}
+
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ int i;
+
+ if (!accel_tcp)
+ return;
+
+ accel_fs_tcp_disable(fs);
+
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
+ accel_fs_tcp_destroy_table(fs, i);
+
+ kfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
+}
+
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp;
+ int i, err;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version))
+ return -EOPNOTSUPP;
+
+ accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL);
+ if (!accel_tcp)
+ return -ENOMEM;
+ mlx5e_fs_set_accel_tcp(fs, accel_tcp);
+
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+ err = accel_fs_tcp_create_table(fs, i);
+ if (err)
+ goto err_destroy_tables;
+ }
+
+ err = accel_fs_tcp_enable(fs);
+ if (err)
+ goto err_destroy_tables;
+
+ return 0;
+
+err_destroy_tables:
+ while (--i >= 0)
+ accel_fs_tcp_destroy_table(fs, i);
+ kfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
new file mode 100644
index 000000000000..a032bff482a6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_ACCEL_FS_TCP_H__
+#define __MLX5E_ACCEL_FS_TCP_H__
+
+#include "en/fs.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs);
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
+ struct sock *sk, u32 tirn,
+ uint32_t flow_tag);
+void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule);
+#else
+static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; }
+static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {}
+static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
+ struct sock *sk, u32 tirn,
+ uint32_t flow_tag)
+{ return ERR_PTR(-EOPNOTSUPP); }
+static inline void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) {}
+#endif
+
+#endif /* __MLX5E_ACCEL_FS_TCP_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index cf58c9637904..a715601865d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -35,26 +35,14 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
-#include <linux/module.h>
#include "en.h"
-#include "en_accel/ipsec.h"
-#include "en_accel/ipsec_rxtx.h"
-
+#include "ipsec.h"
+#include "ipsec_rxtx.h"
static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
- struct mlx5e_ipsec_sa_entry *sa;
-
- if (!x)
- return NULL;
-
- sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
- if (!sa)
- return NULL;
-
- WARN_ON(sa->x != x);
- return sa;
+ return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
}
struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
@@ -77,16 +65,21 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
{
+ unsigned int handle = sa_entry->ipsec_obj_id;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ struct mlx5e_ipsec_sa_entry *_sa_entry;
unsigned long flags;
- int ret;
- ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
- if (ret < 0)
- return ret;
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->sadb_rx, _sa_entry, hlist, handle)
+ if (_sa_entry->handle == handle) {
+ rcu_read_unlock();
+ return -EEXIST;
+ }
+ rcu_read_unlock();
spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
- sa_entry->handle = ret;
+ sa_entry->handle = handle;
hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
@@ -103,21 +96,11 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
}
-static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
-{
- struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-
- /* xfrm already doing sync rcu between del and free callbacks */
-
- ida_simple_remove(&ipsec->halloc, sa_entry->handle);
-}
-
static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
{
struct xfrm_replay_state_esn *replay_esn;
- u32 seq_bottom;
+ u32 seq_bottom = 0;
u8 overlap;
- u32 *esn;
if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
sa_entry->esn_state.trigger = 0;
@@ -125,16 +108,16 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
}
replay_esn = sa_entry->x->replay_esn;
- seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+ if (replay_esn->seq >= replay_esn->replay_window)
+ seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+
overlap = sa_entry->esn_state.overlap;
sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
htonl(seq_bottom));
- esn = &sa_entry->esn_state.esn;
sa_entry->esn_state.trigger = 1;
if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
- ++(*esn);
sa_entry->esn_state.overlap = 0;
return true;
} else if (unlikely(!overlap &&
@@ -151,7 +134,7 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
struct xfrm_state *x = sa_entry->x;
- struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
+ struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
struct aead_geniv_ctx *geniv_ctx;
struct crypto_aead *aead;
unsigned int crypto_data_len, key_len;
@@ -185,25 +168,27 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
}
- /* rx handle */
- attrs->sa_handle = sa_entry->handle;
-
- /* algo type */
- attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
/* action */
- attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
- MLX5_ACCEL_ESP_ACTION_ENCRYPT :
- MLX5_ACCEL_ESP_ACTION_DECRYPT;
+ attrs->action = (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) ?
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+ MLX5_ACCEL_ESP_ACTION_DECRYPT;
/* flags */
attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
MLX5_ACCEL_ESP_FLAGS_TUNNEL;
+
+ /* spi */
+ attrs->spi = be32_to_cpu(x->id.spi);
+
+ /* source , destination ips */
+ memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
+ memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
+ attrs->is_ipv6 = (x->props.family != AF_INET);
}
static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
{
- struct net_device *netdev = x->xso.dev;
+ struct net_device *netdev = x->xso.real_dev;
struct mlx5e_priv *priv;
priv = netdev_priv(netdev);
@@ -221,8 +206,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
return -EINVAL;
}
if (x->props.flags & XFRM_STATE_ESN &&
- !(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_ESN)) {
+ !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_ESN)) {
netdev_info(netdev, "Cannot offload ESN xfrm states\n");
return -EINVAL;
}
@@ -269,26 +253,29 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
return -EINVAL;
}
- if (x->props.family == AF_INET6 &&
- !(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_IPV6)) {
- netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
- return -EINVAL;
- }
return 0;
}
+static void _update_xfrm_state(struct work_struct *work)
+{
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = container_of(
+ modify_work, struct mlx5e_ipsec_sa_entry, modify_work);
+
+ mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs);
+}
+
static int mlx5e_xfrm_add_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
- struct net_device *netdev = x->xso.dev;
- struct mlx5_accel_esp_xfrm_attrs attrs;
+ struct net_device *netdev = x->xso.real_dev;
struct mlx5e_priv *priv;
- __be32 saddr[4] = {0}, daddr[4] = {0}, spi;
- bool is_ipv6 = false;
int err;
priv = netdev_priv(netdev);
+ if (!priv->ipsec)
+ return -EOPNOTSUPP;
err = mlx5e_xfrm_validate_state(x);
if (err)
@@ -303,64 +290,37 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
sa_entry->x = x;
sa_entry->ipsec = priv->ipsec;
- /* Add the SA to handle processed incoming packets before the add SA
- * completion was received
- */
- if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
- err = mlx5e_ipsec_sadb_rx_add(sa_entry);
- if (err) {
- netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
- goto err_entry;
- }
- } else {
- sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
- mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
- }
-
/* check esn */
mlx5e_ipsec_update_esn_state(sa_entry);
- /* create xfrm */
- mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
- sa_entry->xfrm =
- mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
- MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
- if (IS_ERR(sa_entry->xfrm)) {
- err = PTR_ERR(sa_entry->xfrm);
- goto err_sadb_rx;
- }
-
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
/* create hw context */
- if (x->props.family == AF_INET) {
- saddr[3] = x->props.saddr.a4;
- daddr[3] = x->id.daddr.a4;
- } else {
- memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
- memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
- is_ipv6 = true;
- }
- spi = x->id.spi;
- sa_entry->hw_context =
- mlx5_accel_esp_create_hw_context(priv->mdev,
- sa_entry->xfrm,
- saddr, daddr, spi,
- is_ipv6);
- if (IS_ERR(sa_entry->hw_context)) {
- err = PTR_ERR(sa_entry->hw_context);
+ err = mlx5_ipsec_create_sa_ctx(sa_entry);
+ if (err)
goto err_xfrm;
+
+ err = mlx5e_accel_ipsec_fs_add_rule(priv, sa_entry);
+ if (err)
+ goto err_hw_ctx;
+
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+ err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+ if (err)
+ goto err_add_rule;
+ } else {
+ sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+ mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
}
+ INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state);
x->xso.offload_handle = (unsigned long)sa_entry;
goto out;
+err_add_rule:
+ mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+err_hw_ctx:
+ mlx5_ipsec_free_sa_ctx(sa_entry);
err_xfrm:
- mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
-err_sadb_rx:
- if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
- mlx5e_ipsec_sadb_rx_del(sa_entry);
- mlx5e_ipsec_sadb_rx_free(sa_entry);
- }
-err_entry:
kfree(sa_entry);
out:
return err;
@@ -370,37 +330,27 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- if (!sa_entry)
- return;
-
- if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN)
mlx5e_ipsec_sadb_rx_del(sa_entry);
}
static void mlx5e_xfrm_free_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5e_priv *priv = netdev_priv(x->xso.dev);
- if (!sa_entry)
- return;
-
- if (sa_entry->hw_context) {
- flush_workqueue(sa_entry->ipsec->wq);
- mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
- mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
- }
-
- if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
- mlx5e_ipsec_sadb_rx_free(sa_entry);
-
+ cancel_work_sync(&sa_entry->modify_work.work);
+ mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+ mlx5_ipsec_free_sa_ctx(sa_entry);
kfree(sa_entry);
}
int mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
- struct mlx5e_ipsec *ipsec = NULL;
+ struct mlx5e_ipsec *ipsec;
+ int ret;
- if (!MLX5_IPSEC_DEV(priv->mdev)) {
+ if (!mlx5_ipsec_device_caps(priv->mdev)) {
netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
return 0;
}
@@ -411,19 +361,27 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
hash_init(ipsec->sadb_rx);
spin_lock_init(&ipsec->sadb_rx_lock);
- ida_init(&ipsec->halloc);
- ipsec->en_priv = priv;
- ipsec->en_priv->ipsec = ipsec;
- ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+ ipsec->mdev = priv->mdev;
ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
priv->netdev->name);
if (!ipsec->wq) {
- kfree(ipsec);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_wq;
}
+
+ ret = mlx5e_accel_ipsec_fs_init(ipsec);
+ if (ret)
+ goto err_fs_init;
+
+ priv->ipsec = ipsec;
netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
return 0;
+
+err_fs_init:
+ destroy_workqueue(ipsec->wq);
+err_wq:
+ kfree(ipsec);
+ return (ret != -EOPNOTSUPP) ? ret : 0;
}
void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
@@ -433,10 +391,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
if (!ipsec)
return;
- drain_workqueue(ipsec->wq);
+ mlx5e_accel_ipsec_fs_cleanup(ipsec);
destroy_workqueue(ipsec->wq);
-
- ida_destroy(&ipsec->halloc);
kfree(ipsec);
priv->ipsec = NULL;
}
@@ -456,50 +412,19 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
return true;
}
-struct mlx5e_ipsec_modify_state_work {
- struct work_struct work;
- struct mlx5_accel_esp_xfrm_attrs attrs;
- struct mlx5e_ipsec_sa_entry *sa_entry;
-};
-
-static void _update_xfrm_state(struct work_struct *work)
-{
- int ret;
- struct mlx5e_ipsec_modify_state_work *modify_work =
- container_of(work, struct mlx5e_ipsec_modify_state_work, work);
- struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
-
- ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
- &modify_work->attrs);
- if (ret)
- netdev_warn(sa_entry->ipsec->en_priv->netdev,
- "Not an IPSec offload device\n");
-
- kfree(modify_work);
-}
-
static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- struct mlx5e_ipsec_modify_state_work *modify_work;
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ &sa_entry->modify_work;
bool need_update;
- if (!sa_entry)
- return;
-
need_update = mlx5e_ipsec_update_esn_state(sa_entry);
if (!need_update)
return;
- modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
- if (!modify_work)
- return;
-
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
- modify_work->sa_entry = sa_entry;
-
- INIT_WORK(&modify_work->work, _update_xfrm_state);
- WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+ queue_work(sa_entry->ipsec->wq, &modify_work->work);
}
static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
@@ -515,15 +440,9 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
struct net_device *netdev = priv->netdev;
- if (!priv->ipsec)
+ if (!mlx5_ipsec_device_caps(mdev))
return;
- if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
- !MLX5_CAP_ETH(mdev, swp)) {
- mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
- return;
- }
-
mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
netdev->features |= NETIF_F_HW_ESP;
@@ -537,12 +456,12 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
- if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
- !MLX5_CAP_ETH(mdev, swp_lso)) {
+ if (!MLX5_CAP_ETH(mdev, swp_lso)) {
mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
return;
}
+ netdev->gso_partial_features |= NETIF_F_GSO_ESP;
mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
netdev->features |= NETIF_F_GSO_ESP;
netdev->hw_features |= NETIF_F_GSO_ESP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 93bf10e6508c..16bcceec16c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -40,11 +40,56 @@
#include <net/xfrm.h>
#include <linux/idr.h>
-#include "accel/ipsec.h"
-
#define MLX5E_IPSEC_SADB_RX_BITS 10
#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+enum mlx5_accel_esp_flags {
+ MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */
+ MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0,
+ MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1,
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+ MLX5_ACCEL_ESP_ACTION_DECRYPT,
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+struct aes_gcm_keymat {
+ u64 seq_iv;
+
+ u32 salt;
+ u32 icv_len;
+
+ u32 key_len;
+ u32 aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+ enum mlx5_accel_esp_action action;
+ u32 esn;
+ u32 spi;
+ u32 flags;
+ struct aes_gcm_keymat aes_gcm;
+
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ } saddr;
+
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ } daddr;
+
+ u8 is_ipv6;
+};
+
+enum mlx5_ipsec_cap {
+ MLX5_IPSEC_CAP_CRYPTO = 1 << 0,
+ MLX5_IPSEC_CAP_ESN = 1 << 1,
+};
+
struct mlx5e_priv;
struct mlx5e_ipsec_sw_stats {
@@ -55,35 +100,19 @@ struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_tx_drop_no_state;
atomic64_t ipsec_tx_drop_not_ip;
atomic64_t ipsec_tx_drop_trailer;
- atomic64_t ipsec_tx_drop_metadata;
};
-struct mlx5e_ipsec_stats {
- u64 ipsec_dec_in_packets;
- u64 ipsec_dec_out_packets;
- u64 ipsec_dec_bypass_packets;
- u64 ipsec_enc_in_packets;
- u64 ipsec_enc_out_packets;
- u64 ipsec_enc_bypass_packets;
- u64 ipsec_dec_drop_packets;
- u64 ipsec_dec_auth_fail_packets;
- u64 ipsec_enc_drop_packets;
- u64 ipsec_add_sa_success;
- u64 ipsec_add_sa_fail;
- u64 ipsec_del_sa_success;
- u64 ipsec_del_sa_fail;
- u64 ipsec_cmd_drop;
-};
+struct mlx5e_accel_fs_esp;
+struct mlx5e_ipsec_tx;
struct mlx5e_ipsec {
- struct mlx5e_priv *en_priv;
+ struct mlx5_core_dev *mdev;
DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
- bool no_trailer;
- spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
- struct ida halloc;
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx */
struct mlx5e_ipsec_sw_stats sw_stats;
- struct mlx5e_ipsec_stats stats;
struct workqueue_struct *wq;
+ struct mlx5e_accel_fs_esp *rx_fs;
+ struct mlx5e_ipsec_tx *tx_fs;
};
struct mlx5e_ipsec_esn_state {
@@ -92,70 +121,76 @@ struct mlx5e_ipsec_esn_state {
u8 overlap: 1;
};
+struct mlx5e_ipsec_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_modify_hdr *set_modify_hdr;
+};
+
+struct mlx5e_ipsec_modify_state_work {
+ struct work_struct work;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
struct mlx5e_ipsec_sa_entry {
struct hlist_node hlist; /* Item in SADB_RX hashtable */
struct mlx5e_ipsec_esn_state esn_state;
unsigned int handle; /* Handle in SADB_RX */
struct xfrm_state *x;
struct mlx5e_ipsec *ipsec;
- struct mlx5_accel_esp_xfrm *xfrm;
- void *hw_context;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
+ u32 ipsec_obj_id;
+ u32 enc_key_id;
+ struct mlx5e_ipsec_rule ipsec_rule;
+ struct mlx5e_ipsec_modify_state_work modify_work;
};
-void mlx5e_ipsec_build_inverse_table(void);
int mlx5e_ipsec_init(struct mlx5e_priv *priv);
void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
-int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
-int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
-void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
-int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
-
struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
unsigned int handle);
-#else
+void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec);
+int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec);
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry);
-static inline void mlx5e_ipsec_build_inverse_table(void)
-{
-}
+int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
-static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
-{
- return 0;
-}
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev);
-static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
-{
-}
+void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs);
-static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+static inline struct mlx5_core_dev *
+mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry)
{
+ return sa_entry->ipsec->mdev;
}
-
-static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+#else
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
return 0;
}
-static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
- uint8_t *data)
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
{
- return 0;
}
-static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
{
}
-static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
{
return 0;
}
-
#endif
#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
new file mode 100644
index 000000000000..b859e4a4c744
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include "en.h"
+#include "en/fs.h"
+#include "ipsec.h"
+#include "fs_core.h"
+
+#define NUM_IPSEC_FTE BIT(15)
+
+enum accel_fs_esp_type {
+ ACCEL_FS_ESP4,
+ ACCEL_FS_ESP6,
+ ACCEL_FS_ESP_NUM_TYPES,
+};
+
+struct mlx5e_ipsec_rx_err {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_modify_hdr *copy_modify_hdr;
+};
+
+struct mlx5e_accel_fs_esp_prot {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_destination default_dest;
+ struct mlx5e_ipsec_rx_err rx_err;
+ u32 refcnt;
+ struct mutex prot_mutex; /* protect ESP4/ESP6 protocol */
+};
+
+struct mlx5e_accel_fs_esp {
+ struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES];
+};
+
+struct mlx5e_ipsec_tx {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mutex mutex; /* Protect IPsec TX steering */
+ u32 refcnt;
+};
+
+/* IPsec RX flow steering */
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+{
+ if (i == ACCEL_FS_ESP4)
+ return MLX5_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
+}
+
+static int rx_err_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_accel_fs_esp_prot *fs_prot,
+ struct mlx5e_ipsec_rx_err *rx_err)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_flow_handle *fte;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */
+ MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
+ MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME);
+ MLX5_SET(copy_action_in, action, src_offset, 0);
+ MLX5_SET(copy_action_in, action, length, 7);
+ MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(copy_action_in, action, dst_offset, 24);
+
+ modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
+ 1, action);
+
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(priv->netdev,
+ "fail to alloc ipsec copy modify_header_id err=%d\n", err);
+ goto out_spec;
+ }
+
+ /* create fte */
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.modify_hdr = modify_hdr;
+ fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act,
+ &fs_prot->default_dest, 1);
+ if (IS_ERR(fte)) {
+ err = PTR_ERR(fte);
+ netdev_err(priv->netdev, "fail to add ipsec rx err copy rule err=%d\n", err);
+ goto out;
+ }
+
+ kvfree(spec);
+ rx_err->rule = fte;
+ rx_err->copy_modify_hdr = modify_hdr;
+ return 0;
+
+out:
+ mlx5_modify_header_dealloc(mdev, modify_hdr);
+out_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int rx_fs_create(struct mlx5e_priv *priv,
+ struct mlx5e_accel_fs_esp_prot *fs_prot)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table *ft = fs_prot->ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!flow_group_in || !spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Create miss_group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
+ miss_group = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(miss_group)) {
+ err = PTR_ERR(miss_group);
+ netdev_err(priv->netdev, "fail to create ipsec rx miss_group err=%d\n", err);
+ goto out;
+ }
+ fs_prot->miss_group = miss_group;
+
+ /* Create miss rule */
+ miss_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1);
+ if (IS_ERR(miss_rule)) {
+ mlx5_destroy_flow_group(fs_prot->miss_group);
+ err = PTR_ERR(miss_rule);
+ netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err);
+ goto out;
+ }
+ fs_prot->miss_rule = miss_rule;
+out:
+ kvfree(flow_group_in);
+ kvfree(spec);
+ return err;
+}
+
+static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+
+ accel_esp = priv->ipsec->rx_fs;
+
+ /* The netdev unreg already happened, so all offloaded rule are already removed */
+ fs_prot = &accel_esp->fs_prot[type];
+
+ mlx5_del_flow_rules(fs_prot->miss_rule);
+ mlx5_destroy_flow_group(fs_prot->miss_group);
+ mlx5_destroy_flow_table(fs_prot->ft);
+
+ mlx5_del_flow_rules(fs_prot->rx_err.rule);
+ mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+ mlx5_destroy_flow_table(fs_prot->rx_err.ft);
+}
+
+static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(priv->fs, false);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ accel_esp = priv->ipsec->rx_fs;
+ fs_prot = &accel_esp->fs_prot[type];
+ fs_prot->default_dest =
+ mlx5_ttc_get_default_dest(ttc, fs_esp2tt(type));
+
+ ft_attr.max_fte = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ fs_prot->rx_err.ft = ft;
+ err = rx_err_add_rule(priv, fs_prot, &fs_prot->rx_err);
+ if (err)
+ goto err_add;
+
+ /* Create FT */
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+ ft_attr.autogroup.num_reserved_entries = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_fs_ft;
+ }
+ fs_prot->ft = ft;
+
+ err = rx_fs_create(priv, fs_prot);
+ if (err)
+ goto err_fs;
+
+ return 0;
+
+err_fs:
+ mlx5_destroy_flow_table(fs_prot->ft);
+err_fs_ft:
+ mlx5_del_flow_rules(fs_prot->rx_err.rule);
+ mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+err_add:
+ mlx5_destroy_flow_table(fs_prot->rx_err.ft);
+ return err;
+}
+
+static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_accel_fs_esp *accel_esp;
+ int err = 0;
+
+ accel_esp = priv->ipsec->rx_fs;
+ fs_prot = &accel_esp->fs_prot[type];
+ mutex_lock(&fs_prot->prot_mutex);
+ if (fs_prot->refcnt)
+ goto skip;
+
+ /* create FT */
+ err = rx_create(priv, type);
+ if (err)
+ goto out;
+
+ /* connect */
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = fs_prot->ft;
+ mlx5_ttc_fwd_dest(ttc, fs_esp2tt(type), &dest);
+
+skip:
+ fs_prot->refcnt++;
+out:
+ mutex_unlock(&fs_prot->prot_mutex);
+ return err;
+}
+
+static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+
+ accel_esp = priv->ipsec->rx_fs;
+ fs_prot = &accel_esp->fs_prot[type];
+ mutex_lock(&fs_prot->prot_mutex);
+ fs_prot->refcnt--;
+ if (fs_prot->refcnt)
+ goto out;
+
+ /* disconnect */
+ mlx5_ttc_fwd_default_dest(ttc, fs_esp2tt(type));
+
+ /* remove FT */
+ rx_destroy(priv, type);
+
+out:
+ mutex_unlock(&fs_prot->prot_mutex);
+}
+
+/* IPsec TX flow steering */
+static int tx_create(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(ipsec->tx_fs->ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
+ return err;
+ }
+ ipsec->tx_fs->ft = ft;
+ return 0;
+}
+
+static int tx_ft_get(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+ int err = 0;
+
+ mutex_lock(&tx_fs->mutex);
+ if (tx_fs->refcnt)
+ goto skip;
+
+ err = tx_create(priv);
+ if (err)
+ goto out;
+skip:
+ tx_fs->refcnt++;
+out:
+ mutex_unlock(&tx_fs->mutex);
+ return err;
+}
+
+static void tx_ft_put(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+
+ mutex_lock(&tx_fs->mutex);
+ tx_fs->refcnt--;
+ if (tx_fs->refcnt)
+ goto out;
+
+ mlx5_destroy_flow_table(tx_fs->ft);
+out:
+ mutex_unlock(&tx_fs->mutex);
+}
+
+static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 ipsec_obj_id,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act)
+{
+ u8 ip_version = attrs->is_ipv6 ? 6 : 4;
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS;
+
+ /* ip_version */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ip_version);
+
+ /* Non fragmented */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0);
+
+ /* ESP header */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP);
+
+ /* SPI number */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.outer_esp_spi, attrs->spi);
+
+ if (ip_version == 4) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &attrs->saddr.a4, 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &attrs->daddr.a4, 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &attrs->saddr.a6, 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &attrs->daddr.a6, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ }
+
+ flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
+ flow_act->crypto.obj_id = ipsec_obj_id;
+ flow_act->flags |= FLOW_ACT_NO_APPEND;
+}
+
+static int rx_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
+ struct mlx5_modify_hdr *modify_hdr = NULL;
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ enum accel_fs_esp_type type;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ accel_esp = priv->ipsec->rx_fs;
+ type = attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4;
+ fs_prot = &accel_esp->fs_prot[type];
+
+ err = rx_ft_get(priv, type);
+ if (err)
+ return err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+
+ /* Set bit[31] ipsec marker */
+ /* Set bit[23-0] ipsec_obj_id */
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31)));
+ MLX5_SET(set_action_in, action, offset, 0);
+ MLX5_SET(set_action_in, action, length, 32);
+
+ modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL,
+ 1, action);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(priv->netdev,
+ "fail to alloc ipsec set modify_header_id err=%d\n", err);
+ modify_hdr = NULL;
+ goto out_err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ flow_act.modify_hdr = modify_hdr;
+ dest.ft = fs_prot->rx_err.ft;
+ rule = mlx5_add_flow_rules(fs_prot->ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+ attrs->action, err);
+ goto out_err;
+ }
+
+ ipsec_rule->rule = rule;
+ ipsec_rule->set_modify_hdr = modify_hdr;
+ goto out;
+
+out_err:
+ if (modify_hdr)
+ mlx5_modify_header_dealloc(priv->mdev, modify_hdr);
+ rx_ft_put(priv, type);
+
+out:
+ kvfree(spec);
+ return err;
+}
+
+static int tx_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ err = tx_ft_get(priv);
+ if (err)
+ return err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ setup_fte_common(&sa_entry->attrs, sa_entry->ipsec_obj_id, spec,
+ &flow_act);
+
+ /* Add IPsec indicator in metadata_reg_a */
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT;
+ rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+ sa_entry->attrs.action, err);
+ goto out;
+ }
+
+ sa_entry->ipsec_rule.rule = rule;
+
+out:
+ kvfree(spec);
+ if (err)
+ tx_ft_put(priv);
+ return err;
+}
+
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+ return tx_add_rule(priv, sa_entry);
+
+ return rx_add_rule(priv, sa_entry);
+}
+
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+
+ mlx5_del_flow_rules(ipsec_rule->rule);
+
+ if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) {
+ tx_ft_put(priv);
+ return;
+ }
+
+ mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr);
+ rx_ft_put(priv,
+ sa_entry->attrs.is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
+}
+
+void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
+{
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+ enum accel_fs_esp_type i;
+
+ if (!ipsec->rx_fs)
+ return;
+
+ mutex_destroy(&ipsec->tx_fs->mutex);
+ WARN_ON(ipsec->tx_fs->refcnt);
+ kfree(ipsec->tx_fs);
+
+ accel_esp = ipsec->rx_fs;
+ for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
+ fs_prot = &accel_esp->fs_prot[i];
+ mutex_destroy(&fs_prot->prot_mutex);
+ WARN_ON(fs_prot->refcnt);
+ }
+ kfree(ipsec->rx_fs);
+}
+
+int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
+{
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_namespace *ns;
+ enum accel_fs_esp_type i;
+ int err = -ENOMEM;
+
+ ns = mlx5_get_flow_namespace(ipsec->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS_IPSEC);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ipsec->tx_fs = kzalloc(sizeof(*ipsec->tx_fs), GFP_KERNEL);
+ if (!ipsec->tx_fs)
+ return -ENOMEM;
+
+ ipsec->rx_fs = kzalloc(sizeof(*ipsec->rx_fs), GFP_KERNEL);
+ if (!ipsec->rx_fs)
+ goto err_rx;
+
+ mutex_init(&ipsec->tx_fs->mutex);
+ ipsec->tx_fs->ns = ns;
+
+ accel_esp = ipsec->rx_fs;
+ for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
+ fs_prot = &accel_esp->fs_prot[i];
+ mutex_init(&fs_prot->prot_mutex);
+ }
+
+ return 0;
+
+err_rx:
+ kfree(ipsec->tx_fs);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
new file mode 100644
index 000000000000..792724ce7336
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "ipsec.h"
+#include "lib/mlx5.h"
+
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ u32 caps = 0;
+
+ if (!MLX5_CAP_GEN(mdev, ipsec_offload))
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return 0;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return 0;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt))
+ return 0;
+
+ if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) ||
+ !MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt))
+ return 0;
+
+ if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) &&
+ MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
+ caps |= MLX5_IPSEC_CAP_CRYPTO;
+
+ if (!caps)
+ return 0;
+
+ if (MLX5_CAP_IPSEC(mdev, ipsec_esn))
+ caps |= MLX5_IPSEC_CAP_ESN;
+
+ /* We can accommodate up to 2^24 different IPsec objects
+ * because we use up to 24 bit in flow table metadata
+ * to hold the IPsec Object unique handle.
+ */
+ WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24);
+ return caps;
+}
+EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps);
+
+static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {};
+ void *obj, *salt_p, *salt_iv_p;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object);
+
+ /* salt and seq_iv */
+ salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt);
+ memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt));
+
+ MLX5_SET(ipsec_obj, obj, icv_length, MLX5_IPSEC_OBJECT_ICV_LEN_16B);
+ salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv);
+ memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv));
+ /* esn */
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+ MLX5_SET(ipsec_obj, obj, esn_en, 1);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
+ MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
+ }
+
+ MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id);
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ sa_entry->ipsec_obj_id =
+ MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void mlx5_destroy_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct aes_gcm_keymat *aes_gcm = &sa_entry->attrs.aes_gcm;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ int err;
+
+ /* key */
+ err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key,
+ aes_gcm->key_len / BITS_PER_BYTE,
+ MLX5_ACCEL_OBJ_IPSEC_KEY,
+ &sa_entry->enc_key_id);
+ if (err) {
+ mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err);
+ return err;
+ }
+
+ err = mlx5_create_ipsec_obj(sa_entry);
+ if (err) {
+ mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err);
+ goto err_enc_key;
+ }
+
+ return 0;
+
+err_enc_key:
+ mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id);
+ return err;
+}
+
+void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+
+ mlx5_destroy_ipsec_obj(sa_entry);
+ mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id);
+}
+
+static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)];
+ u64 modify_field_select = 0;
+ u64 general_obj_types;
+ void *obj;
+ int err;
+
+ if (!(attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED))
+ return 0;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return -EINVAL;
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n",
+ sa_entry->ipsec_obj_id, err);
+ return err;
+ }
+
+ obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object);
+ modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select);
+
+ /* esn */
+ if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) ||
+ !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB))
+ return -EOPNOTSUPP;
+
+ obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object);
+ MLX5_SET64(ipsec_obj, obj, modify_field_select,
+ MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP |
+ MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
+ MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ int err;
+
+ err = mlx5_modify_ipsec_obj(sa_entry, attrs);
+ if (err)
+ return;
+
+ memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 0dd17514caae..6859f1c1a831 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -34,78 +34,15 @@
#include <crypto/aead.h>
#include <net/xfrm.h>
#include <net/esp.h>
-
-#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/ipsec.h"
-#include "accel/accel.h"
+#include "ipsec.h"
+#include "ipsec_rxtx.h"
#include "en.h"
enum {
- MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
- MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
- MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
-};
-
-struct mlx5e_ipsec_rx_metadata {
- unsigned char nexthdr;
- __be32 sa_handle;
-} __packed;
-
-enum {
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
};
-struct mlx5e_ipsec_tx_metadata {
- __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */
- __be16 seq; /* LSBs of the first TCP seq, only for LSO */
- u8 esp_next_proto; /* Next protocol of ESP */
-} __packed;
-
-struct mlx5e_ipsec_metadata {
- unsigned char syndrome;
- union {
- unsigned char raw[5];
- /* from FPGA to host, on successful decrypt */
- struct mlx5e_ipsec_rx_metadata rx;
- /* from host to FPGA */
- struct mlx5e_ipsec_tx_metadata tx;
- } __packed content;
- /* packet type ID field */
- __be16 ethertype;
-} __packed;
-
-#define MAX_LSO_MSS 2048
-
-/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
-static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
-
-static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
-{
- return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
-}
-
-static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
-{
- struct mlx5e_ipsec_metadata *mdata;
- struct ethhdr *eth;
-
- if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
- return ERR_PTR(-ENOMEM);
-
- eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
- skb->mac_header -= sizeof(*mdata);
- mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
-
- memmove(skb->data, skb->data + sizeof(*mdata),
- 2 * ETH_ALEN);
-
- eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
-
- memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
- return mdata;
-}
-
static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
{
unsigned int alen = crypto_aead_authsize(x->data);
@@ -136,33 +73,78 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg, u8 mode,
struct xfrm_offload *xo)
{
- struct mlx5e_swp_spec swp_spec = {};
-
/* Tunnel Mode:
* SWP: OutL3 InL3 InL4
* Pkt: MAC IP ESP IP L4
*
* Transport Mode:
- * SWP: OutL3 InL4
- * InL3
+ * SWP: OutL3 OutL4
* Pkt: MAC IP ESP L4
+ *
+ * Tunnel(VXLAN TCP/UDP) over Transport Mode
+ * SWP: OutL3 InL3 InL4
+ * Pkt: MAC IP ESP UDP VXLAN IP L4
*/
- swp_spec.l3_proto = skb->protocol;
- swp_spec.is_tun = mode == XFRM_MODE_TUNNEL;
- if (swp_spec.is_tun) {
- if (xo->proto == IPPROTO_IPV6) {
- swp_spec.tun_l3_proto = htons(ETH_P_IPV6);
- swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr;
- } else {
- swp_spec.tun_l3_proto = htons(ETH_P_IP);
- swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol;
+
+ /* Shared settings */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+ /* Tunnel mode */
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+
+ switch (xo->inner_ipproto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ /* IP | ESP | IP | [TCP | UDP] */
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ default:
+ break;
+ }
+ return;
+ }
+
+ /* Transport mode */
+ if (mode != XFRM_MODE_TRANSPORT)
+ return;
+
+ if (!xo->inner_ipproto) {
+ switch (xo->proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ /* IP | ESP | TCP */
+ eseg->swp_outer_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ default:
+ break;
}
} else {
- swp_spec.tun_l3_proto = skb->protocol;
- swp_spec.tun_l4_proto = xo->proto;
+ /* Tunnel(VXLAN TCP/UDP) over Transport Mode */
+ switch (xo->inner_ipproto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ eseg->swp_inner_l4_offset =
+ (skb->csum_start + skb->head - skb->data) / 2;
+ if (inner_ip_hdr(skb)->version == 6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ break;
+ default:
+ break;
+ }
}
- mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
}
void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
@@ -199,54 +181,90 @@ void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
skb_store_bits(skb, iv_offset, &seqno, 8);
}
-static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
- struct mlx5e_ipsec_metadata *mdata,
- struct xfrm_offload *xo)
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg)
{
- struct ip_esp_hdr *esph;
- struct tcphdr *tcph;
-
- if (skb_is_gso(skb)) {
- /* Add LSO metadata indication */
- esph = ip_esp_hdr(skb);
- tcph = inner_tcp_hdr(skb);
- netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
- skb->network_header,
- skb->transport_header,
- skb->inner_network_header,
- skb->inner_transport_header);
- netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
- skb->len, skb_shinfo(skb)->gso_size,
- ntohs(tcph->source), ntohs(tcph->dest),
- ntohl(tcph->seq), ntohl(esph->seq_no));
- mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
- mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
- mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
- } else {
- mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
- }
- mdata->content.tx.esp_next_proto = xo->proto;
+ inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG);
+ esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto);
+}
+
+static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct xfrm_state *x,
+ struct xfrm_offload *xo,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ unsigned int blksize, clen, alen, plen;
+ struct crypto_aead *aead;
+ unsigned int tailen;
+
+ ipsec_st->x = x;
+ ipsec_st->xo = xo;
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2, blksize);
+ plen = max_t(u32, clen - skb->len, 4);
+ tailen = plen + alen;
+ ipsec_st->plen = plen;
+ ipsec_st->tailen = tailen;
- netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
- mdata->syndrome, mdata->content.tx.esp_next_proto,
- ntohs(mdata->content.tx.mss_inv),
- ntohs(mdata->content.tx.seq));
+ return 0;
}
-struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
- struct mlx5e_tx_wqe *wqe,
- struct sk_buff *skb)
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_encap_tmpl *encap;
+ struct xfrm_state *x;
+ struct sec_path *sp;
+ u8 l3_proto;
+
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1))
+ return;
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x))
+ return;
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))))
+ return;
+
+ mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
+
+ l3_proto = (x->props.family == AF_INET) ?
+ ((struct iphdr *)skb_network_header(skb))->protocol :
+ ((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
+
+ eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+ eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+ encap = x->encap;
+ if (!encap) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+ } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
+ }
+}
+
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo = xfrm_offload(skb);
- struct mlx5e_ipsec_metadata *mdata;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct xfrm_state *x;
struct sec_path *sp;
- if (!xo)
- return skb;
-
sp = skb_sec_path(skb);
if (unlikely(sp->len != 1)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
@@ -271,124 +289,66 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
goto drop;
}
- mdata = mlx5e_ipsec_add_metadata(skb);
- if (IS_ERR(mdata)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
- goto drop;
- }
- mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+
sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
sa_entry->set_iv_op(skb, x, xo);
- mlx5e_ipsec_set_metadata(skb, mdata, xo);
+ mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
- return skb;
+ return true;
drop:
kfree_skb(skb);
- return NULL;
+ return false;
}
-static inline struct xfrm_state *
-mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
- struct mlx5e_ipsec_metadata *mdata)
+enum {
+ MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED,
+ MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED,
+ MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER,
+};
+
+void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
+ u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_priv *priv;
struct xfrm_offload *xo;
struct xfrm_state *xs;
struct sec_path *sp;
- u32 sa_handle;
+ u32 sa_handle;
+ sa_handle = MLX5_IPSEC_METADATA_HANDLE(ipsec_meta_data);
+ priv = netdev_priv(netdev);
sp = secpath_set(skb);
if (unlikely(!sp)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
- return NULL;
+ return;
}
- sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
if (unlikely(!xs)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
- return NULL;
+ return;
}
- sp = skb_sec_path(skb);
sp->xvec[sp->len++] = xs;
sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
- switch (mdata->syndrome) {
- case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
+
+ switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
+ case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
xo->status = CRYPTO_SUCCESS;
- if (likely(priv->ipsec->no_trailer)) {
- xo->flags |= XFRM_ESP_NO_TRAILER;
- xo->proto = mdata->content.rx.nexthdr;
- }
break;
- case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
+ case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED:
xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
break;
- case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
- xo->status = CRYPTO_INVALID_PROTOCOL;
+ case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER:
+ xo->status = CRYPTO_INVALID_PACKET_SYNTAX;
break;
default:
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
- return NULL;
- }
- return xs;
-}
-
-struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
- struct sk_buff *skb, u32 *cqe_bcnt)
-{
- struct mlx5e_ipsec_metadata *mdata;
- struct xfrm_state *xs;
-
- if (!is_metadata_hdr_valid(skb))
- return skb;
-
- /* Use the metadata */
- mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
- xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
- if (unlikely(!xs)) {
- kfree_skb(skb);
- return NULL;
- }
-
- remove_metadata_hdr(skb);
- *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
-
- return skb;
-}
-
-bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
- netdev_features_t features)
-{
- struct sec_path *sp = skb_sec_path(skb);
- struct xfrm_state *x;
-
- if (sp && sp->len) {
- x = sp->xvec[0];
- if (x && x->xso.offload_handle)
- return true;
- }
- return false;
-}
-
-void mlx5e_ipsec_build_inverse_table(void)
-{
- u16 mss_inv;
- u32 mss;
-
- /* Calculate 1/x inverse table for use in GSO data path.
- * Using this table, we provide the IPSec accelerator with the value of
- * 1/gso_size so that it can infer the position of each segment inside
- * the GSO, and increment the ESP sequence number, and generate the IV.
- * The HW needs this value in Q0.16 fixed-point number format
- */
- mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
- for (mss = 2; mss < MAX_LSO_MSS; mss++) {
- mss_inv = div_u64(1ULL << 32, mss) >> 16;
- mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index db84500b024f..1878a70b9031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -34,28 +34,136 @@
#ifndef __MLX5E_IPSEC_RXTX_H__
#define __MLX5E_IPSEC_RXTX_H__
-#ifdef CONFIG_MLX5_EN_IPSEC
-
#include <linux/skbuff.h>
#include <net/xfrm.h>
#include "en.h"
#include "en/txrx.h"
-struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
- struct sk_buff *skb, u32 *cqe_bcnt);
-void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+/* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */
+#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1)
+#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0))
+#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0))
+
+struct mlx5e_accel_tx_ipsec_state {
+ struct xfrm_offload *xo;
+ struct xfrm_state *x;
+ u32 tailen;
+ u32 plen;
+};
+
+#ifdef CONFIG_MLX5_EN_IPSEC
void mlx5e_ipsec_inverse_table_init(void);
-bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
- netdev_features_t features);
void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
-struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
- struct mlx5e_tx_wqe *wqe,
- struct sk_buff *skb);
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st);
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg);
+void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe);
+static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ return ipsec_st->tailen;
+}
+
+static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
+{
+ return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
+}
+
+static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
+{
+ return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
+
+static inline netdev_features_t
+mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct sec_path *sp = skb_sec_path(skb);
+
+ if (sp && sp->len && xo) {
+ struct xfrm_state *x = sp->xvec[0];
+
+ if (!x || !x->xso.offload_handle)
+ goto out_disable;
+
+ if (xo->inner_ipproto) {
+ /* Cannot support tunnel packet over IPsec tunnel mode
+ * because we cannot offload three IP header csum
+ */
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ goto out_disable;
+
+ /* Only support UDP or TCP L4 checksum */
+ if (xo->inner_ipproto != IPPROTO_UDP &&
+ xo->inner_ipproto != IPPROTO_TCP)
+ goto out_disable;
+ }
+
+ return features;
+
+ }
+
+ /* Disable CSUM and GSO for software IPsec */
+out_disable:
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static inline bool
+mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ u8 inner_ipproto;
+
+ if (!mlx5e_ipsec_eseg_meta(eseg))
+ return false;
+
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ inner_ipproto = xfrm_offload(skb)->inner_ipproto;
+ if (inner_ipproto) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+ } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial_inner++;
+ }
+
+ return true;
+}
+#else
+static inline
+void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{}
+
+static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
+{
+ return false;
+}
+
+static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; }
+static inline netdev_features_t
+mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+{ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); }
+static inline bool
+mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ return false;
+}
#endif /* CONFIG_MLX5_EN_IPSEC */
#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
index 6fea59223dc4..9de84821dafb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -35,26 +35,7 @@
#include <net/sock.h>
#include "en.h"
-#include "accel/ipsec.h"
-#include "fpga/sdk.h"
-#include "en_accel/ipsec.h"
-
-static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
-};
+#include "ipsec.h"
static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
@@ -64,70 +45,40 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
};
#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
-#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
-#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
-
-int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw)
{
- if (!priv->ipsec)
- return 0;
-
- return NUM_IPSEC_COUNTERS;
+ return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0;
}
-int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
-{
- unsigned int i, idx = 0;
-
- if (!priv->ipsec)
- return 0;
-
- for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
- strcpy(data + (idx++) * ETH_GSTRING_LEN,
- mlx5e_ipsec_hw_stats_desc[i].format);
-
- for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
- strcpy(data + (idx++) * ETH_GSTRING_LEN,
- mlx5e_ipsec_sw_stats_desc[i].format);
+static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {}
- return NUM_IPSEC_COUNTERS;
-}
-
-void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_sw)
{
- int ret;
+ unsigned int i;
- if (!priv->ipsec)
- return;
-
- ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
- NUM_IPSEC_HW_COUNTERS);
- if (ret)
- memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
+ if (priv->ipsec)
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_sw_stats_desc[i].format);
+ return idx;
}
-int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw)
{
- int i, idx = 0;
-
- if (!priv->ipsec)
- return 0;
+ int i;
- for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
- mlx5e_ipsec_hw_stats_desc, i);
-
- for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
- data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
- mlx5e_ipsec_sw_stats_desc, i);
-
- return NUM_IPSEC_COUNTERS;
+ if (priv->ipsec)
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+ mlx5e_ipsec_sw_stats_desc, i);
+ return idx;
}
+
+MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index 46725cd743a3..da2184c94203 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -2,18 +2,47 @@
// Copyright (c) 2019 Mellanox Technologies.
#include "en.h"
+#include "lib/mlx5.h"
#include "en_accel/ktls.h"
+#include "en_accel/ktls_utils.h"
+#include "en_accel/fs_tcp.h"
-static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id)
{
- u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
- void *tisc;
-
- tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+ u32 sz_bytes;
+ void *key;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
- MLX5_SET(tisc, tisc, tls_en, 1);
+ return mlx5_create_encryption_key(mdev, key, sz_bytes,
+ MLX5_ACCEL_OBJ_TLS_KEY,
+ p_key_id);
+}
- return mlx5e_create_tis(mdev, in, tisn);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ mlx5_destroy_encryption_key(mdev, key_id);
}
static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
@@ -22,42 +51,17 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
u32 start_offload_tcp_sn)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_ktls_offload_context_tx *tx_priv;
- struct tls_context *tls_ctx = tls_get_ctx(sk);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
- if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX))
- return -EINVAL;
-
- if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
+ if (!mlx5e_ktls_type_check(mdev, crypto_info))
return -EOPNOTSUPP;
- tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL);
- if (!tx_priv)
- return -ENOMEM;
-
- tx_priv->expected_seq = start_offload_tcp_sn;
- tx_priv->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
- mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+ err = mlx5e_ktls_add_tx(netdev, sk, crypto_info, start_offload_tcp_sn);
+ else
+ err = mlx5e_ktls_add_rx(netdev, sk, crypto_info, start_offload_tcp_sn);
- /* tc and underlay_qpn values are not in use for tls tis */
- err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn);
- if (err)
- goto create_tis_fail;
-
- err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id);
- if (err)
- goto encryption_key_create_fail;
-
- mlx5e_ktls_tx_offload_set_pending(tx_priv);
-
- return 0;
-
-encryption_key_create_fail:
- mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
-create_tis_fail:
- kvfree(tx_priv);
return err;
}
@@ -65,29 +69,131 @@ static void mlx5e_ktls_del(struct net_device *netdev,
struct tls_context *tls_ctx,
enum tls_offload_ctx_dir direction)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_ktls_offload_context_tx *tx_priv =
- mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+ mlx5e_ktls_del_tx(netdev, tls_ctx);
+ else
+ mlx5e_ktls_del_rx(netdev, tls_ctx);
+}
- mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id);
- mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
- kvfree(tx_priv);
+static int mlx5e_ktls_resync(struct net_device *netdev,
+ struct sock *sk, u32 seq, u8 *rcd_sn,
+ enum tls_offload_ctx_dir direction)
+{
+ if (unlikely(direction != TLS_OFFLOAD_CTX_DIR_RX))
+ return -EOPNOTSUPP;
+
+ mlx5e_ktls_rx_resync(netdev, sk, seq, rcd_sn);
+ return 0;
}
static const struct tlsdev_ops mlx5e_ktls_ops = {
.tls_dev_add = mlx5e_ktls_add,
.tls_dev_del = mlx5e_ktls_del,
+ .tls_dev_resync = mlx5e_ktls_resync,
};
+bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+
+ if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
+
+ /* Check the possibility to post the required ICOSQ WQEs. */
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS))
+ return false;
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS))
+ return false;
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS))
+ return false;
+
+ return true;
+}
+
void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
- if (!mlx5_accel_is_ktls_device(priv->mdev))
+ if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev))
return;
- netdev->hw_features |= NETIF_F_HW_TLS_TX;
- netdev->features |= NETIF_F_HW_TLS_TX;
+ if (mlx5e_is_ktls_tx(mdev)) {
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ netdev->features |= NETIF_F_HW_TLS_TX;
+ }
+
+ if (mlx5e_is_ktls_rx(mdev))
+ netdev->hw_features |= NETIF_F_HW_TLS_RX;
netdev->tlsdev_ops = &mlx5e_ktls_ops;
}
+
+int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+ if (enable)
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
+ else
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ if (!mlx5e_is_ktls_rx(priv->mdev))
+ return 0;
+
+ priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx");
+ if (!priv->tls->rx_wq)
+ return -ENOMEM;
+
+ if (priv->netdev->features & NETIF_F_HW_TLS_RX) {
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
+ if (err) {
+ destroy_workqueue(priv->tls->rx_wq);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_rx(priv->mdev))
+ return;
+
+ if (priv->netdev->features & NETIF_F_HW_TLS_RX)
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
+
+ destroy_workqueue(priv->tls->rx_wq);
+}
+
+int mlx5e_ktls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls;
+
+ if (!mlx5e_is_ktls_device(priv->mdev))
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
+{
+ kfree(priv->tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index a3efa29a4629..1c35045e41fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -4,117 +4,142 @@
#ifndef __MLX5E_KTLS_H__
#define __MLX5E_KTLS_H__
+#include <linux/tls.h>
+#include <net/tls.h>
#include "en.h"
#ifdef CONFIG_MLX5_EN_TLS
-#include <net/tls.h>
-#include "accel/tls.h"
-
-#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
- (offsetof(struct mlx5e_umr_wqe, tls_static_params_ctx) + \
- MLX5_ST_SZ_BYTES(tls_static_params))
-#define MLX5E_KTLS_STATIC_WQEBBS \
- (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB))
-
-#define MLX5E_KTLS_PROGRESS_WQE_SZ \
- (offsetof(struct mlx5e_tx_wqe, tls_progress_params_ctx) + \
- MLX5_ST_SZ_BYTES(tls_progress_params))
-#define MLX5E_KTLS_PROGRESS_WQEBBS \
- (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
-
-struct mlx5e_dump_wqe {
- struct mlx5_wqe_ctrl_seg ctrl;
- struct mlx5_wqe_data_seg data;
-};
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
-#define MLX5E_KTLS_DUMP_WQEBBS \
- (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
+static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+ if (is_kdump_kernel())
+ return false;
-enum {
- MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
- MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1,
- MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
-};
+ if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
-enum {
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2,
-};
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
-struct mlx5e_ktls_offload_context_tx {
- struct tls_offload_context_tx *tx_ctx;
- struct tls12_crypto_info_aes_gcm_128 crypto_info;
- u32 expected_seq;
- u32 tisn;
- u32 key_id;
- bool ctx_post_pending;
-};
+ return (MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128) ||
+ MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256));
+}
-struct mlx5e_ktls_offload_context_tx_shadow {
- struct tls_offload_context_tx tx_ctx;
- struct mlx5e_ktls_offload_context_tx *priv_tx;
-};
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info)
+{
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256);
+ break;
+ }
+
+ return false;
+}
-static inline void
-mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
- struct mlx5e_ktls_offload_context_tx *priv_tx)
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv);
+int mlx5e_ktls_init_rx(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv);
+int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable);
+struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void);
+void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list);
+
+static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev)
{
- struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
- struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+ return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx);
+}
+
+bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev);
- BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_ctx;
+ atomic64_t tx_tls_del;
+ atomic64_t tx_tls_pool_alloc;
+ atomic64_t tx_tls_pool_free;
+ atomic64_t rx_tls_ctx;
+ atomic64_t rx_tls_del;
+};
+
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+ struct workqueue_struct *rx_wq;
+ struct mlx5e_tls_tx_pool *tx_pool;
+};
- shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+int mlx5e_ktls_init(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv);
- shadow->priv_tx = priv_tx;
- priv_tx->tx_ctx = tx_ctx;
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv);
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
}
-static inline struct mlx5e_ktls_offload_context_tx *
-mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
{
- struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
- struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+ return 0;
+}
- BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+}
- shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
- return shadow->priv_tx;
+static inline void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
+{
}
-void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
-void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
-
-struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
- struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
- struct mlx5e_tx_wqe **wqe, u16 *pi);
-void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
- struct mlx5e_tx_wqe_info *wi,
- u32 *dma_fifo_cc);
-static inline u8
-mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
- unsigned int sync_len)
+static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
{
- /* Given the MTU and sync_len, calculates an upper bound for the
- * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
- */
- return MLX5E_KTLS_DUMP_WQEBBS *
- (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+ netdev_warn(netdev, "kTLS is not supported\n");
+ return -EOPNOTSUPP;
}
-#else
-static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+static inline struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void)
{
+ return ERR_PTR(-EOPNOTSUPP);
}
static inline void
-mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
- struct mlx5e_tx_wqe_info *wi,
- u32 *dma_fifo_cc) {}
+mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {}
+static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ return false;
+}
+
+static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ return 0;
+}
+
+static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
#endif
#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
new file mode 100644
index 000000000000..3e54834747ce
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <net/inet6_hashtables.h>
+#include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include "en_accel/ktls_utils.h"
+#include "en_accel/fs_tcp.h"
+
+struct accel_rule {
+ struct work_struct work;
+ struct mlx5e_priv *priv;
+ struct mlx5_flow_handle *rule;
+};
+
+#define PROGRESS_PARAMS_WRITE_UNIT 64
+#define PROGRESS_PARAMS_PADDED_SIZE \
+ (ALIGN(sizeof(struct mlx5_wqe_tls_progress_params_seg), \
+ PROGRESS_PARAMS_WRITE_UNIT))
+
+struct mlx5e_ktls_rx_resync_buf {
+ union {
+ struct mlx5_wqe_tls_progress_params_seg progress;
+ u8 pad[PROGRESS_PARAMS_PADDED_SIZE];
+ } ____cacheline_aligned_in_smp;
+ dma_addr_t dma_addr;
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+};
+
+enum {
+ MLX5E_PRIV_RX_FLAG_DELETING,
+ MLX5E_NUM_PRIV_RX_FLAGS,
+};
+
+struct mlx5e_ktls_rx_resync_ctx {
+ struct tls_offload_resync_async core;
+ struct work_struct work;
+ struct mlx5e_priv *priv;
+ refcount_t refcnt;
+ __be64 sw_rcd_sn_be;
+ u32 seq;
+};
+
+struct mlx5e_ktls_offload_context_rx {
+ union mlx5e_crypto_info crypto_info;
+ struct accel_rule rule;
+ struct sock *sk;
+ struct mlx5e_rq_stats *rq_stats;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ struct completion add_ctx;
+ struct mlx5e_tir tir;
+ u32 key_id;
+ u32 rxq;
+ DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
+
+ /* resync */
+ spinlock_t lock; /* protects resync fields */
+ struct mlx5e_ktls_rx_resync_ctx resync;
+ struct list_head list;
+};
+
+static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ if (!refcount_dec_and_test(&priv_rx->resync.refcnt))
+ return false;
+
+ kfree(priv_rx);
+ return true;
+}
+
+static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ refcount_inc(&priv_rx->resync.refcnt);
+}
+
+struct mlx5e_ktls_resync_resp {
+ /* protects list changes */
+ spinlock_t lock;
+ struct list_head list;
+};
+
+void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list)
+{
+ kvfree(resp_list);
+}
+
+struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void)
+{
+ struct mlx5e_ktls_resync_resp *resp_list;
+
+ resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL);
+ if (!resp_list)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&resp_list->list);
+ spin_lock_init(&resp_list->lock);
+
+ return resp_list;
+}
+
+static void accel_rule_handle_work(struct work_struct *work)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct accel_rule *accel_rule;
+ struct mlx5_flow_handle *rule;
+
+ accel_rule = container_of(work, struct accel_rule, work);
+ priv_rx = container_of(accel_rule, struct mlx5e_ktls_offload_context_rx, rule);
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+ goto out;
+
+ rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ MLX5_FS_DEFAULT_FLOW_TAG);
+ if (!IS_ERR_OR_NULL(rule))
+ accel_rule->rule = rule;
+out:
+ complete(&priv_rx->add_ctx);
+}
+
+static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv)
+{
+ INIT_WORK(&rule->work, accel_rule_handle_work);
+ rule->priv = priv;
+}
+
+static void icosq_fill_wi(struct mlx5e_icosq *sq, u16 pi,
+ struct mlx5e_icosq_wqe_info *wi)
+{
+ sq->db.wqe_info[pi] = *wi;
+}
+
+static struct mlx5_wqe_ctrl_seg *
+post_static_params(struct mlx5e_icosq *sq,
+ struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ struct mlx5e_set_tls_static_params_wqe *wqe;
+ struct mlx5e_icosq_wqe_info wi;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS;
+ if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs)))
+ return ERR_PTR(-ENOSPC);
+
+ pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ priv_rx->key_id, priv_rx->resync.seq, false,
+ TLS_OFFLOAD_CTX_DIR_RX);
+ wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS,
+ .num_wqebbs = num_wqebbs,
+ .tls_set_params.priv_rx = priv_rx,
+ };
+ icosq_fill_wi(sq, pi, &wi);
+ sq->pc += num_wqebbs;
+
+ return &wqe->ctrl;
+}
+
+static struct mlx5_wqe_ctrl_seg *
+post_progress_params(struct mlx5e_icosq *sq,
+ struct mlx5e_ktls_offload_context_rx *priv_rx,
+ u32 next_record_tcp_sn)
+{
+ struct mlx5e_set_tls_progress_params_wqe *wqe;
+ struct mlx5e_icosq_wqe_info wi;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS;
+ if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs)))
+ return ERR_PTR(-ENOSPC);
+
+ pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ false, next_record_tcp_sn,
+ TLS_OFFLOAD_CTX_DIR_RX);
+ wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS,
+ .num_wqebbs = num_wqebbs,
+ .tls_set_params.priv_rx = priv_rx,
+ };
+
+ icosq_fill_wi(sq, pi, &wi);
+ sq->pc += num_wqebbs;
+
+ return &wqe->ctrl;
+}
+
+static int post_rx_param_wqes(struct mlx5e_channel *c,
+ struct mlx5e_ktls_offload_context_rx *priv_rx,
+ u32 next_record_tcp_sn)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_icosq *sq;
+ int err;
+
+ err = 0;
+ sq = &c->async_icosq;
+ spin_lock_bh(&c->async_icosq_lock);
+
+ cseg = post_static_params(sq, priv_rx);
+ if (IS_ERR(cseg))
+ goto err_out;
+ cseg = post_progress_params(sq, priv_rx, next_record_tcp_sn);
+ if (IS_ERR(cseg))
+ goto err_out;
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+unlock:
+ spin_unlock_bh(&c->async_icosq_lock);
+
+ return err;
+
+err_out:
+ priv_rx->rq_stats->tls_resync_req_skip++;
+ err = PTR_ERR(cseg);
+ complete(&priv_rx->add_ctx);
+ goto unlock;
+}
+
+static void
+mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ struct mlx5e_ktls_offload_context_rx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
+
+ BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX);
+
+ *ctx = priv_rx;
+}
+
+static struct mlx5e_ktls_offload_context_rx *
+mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_rx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
+
+ return *ctx;
+}
+
+/* Re-sync */
+/* Runs in work context */
+static int
+resync_post_get_progress_params(struct mlx5e_icosq *sq,
+ struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ struct mlx5e_get_tls_progress_params_wqe *wqe;
+ struct mlx5e_ktls_rx_resync_buf *buf;
+ struct mlx5e_icosq_wqe_info wi;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_seg_get_psv *psv;
+ struct device *pdev;
+ int err;
+ u16 pi;
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (unlikely(!buf)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ pdev = mlx5_core_dma_dev(sq->channel->priv->mdev);
+ buf->dma_addr = dma_map_single(pdev, &buf->progress,
+ PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ buf->priv_rx = priv_rx;
+
+ spin_lock_bh(&sq->channel->async_icosq_lock);
+
+ if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) {
+ spin_unlock_bh(&sq->channel->async_icosq_lock);
+ err = -ENOSPC;
+ goto err_dma_unmap;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS);
+ wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi);
+
+#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS))
+
+ cseg = &wqe->ctrl;
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_GET_PSV |
+ (MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS << 24));
+ cseg->qpn_ds =
+ cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | GET_PSV_DS_CNT);
+
+ psv = &wqe->psv;
+ psv->num_psv = 1 << 4;
+ psv->l_key = sq->channel->mkey_be;
+ psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir));
+ psv->va = cpu_to_be64(buf->dma_addr);
+
+ wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS,
+ .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS,
+ .tls_get_params.buf = buf,
+ };
+ icosq_fill_wi(sq, pi, &wi);
+ sq->pc++;
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ spin_unlock_bh(&sq->channel->async_icosq_lock);
+
+ return 0;
+
+err_dma_unmap:
+ dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+err_free:
+ kfree(buf);
+err_out:
+ priv_rx->rq_stats->tls_resync_req_skip++;
+ return err;
+}
+
+/* Function is called with elevated refcount.
+ * It decreases it only if no WQE is posted.
+ */
+static void resync_handle_work(struct work_struct *work)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct mlx5e_channel *c;
+ struct mlx5e_icosq *sq;
+
+ resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work);
+ priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync);
+
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
+ mlx5e_ktls_priv_rx_put(priv_rx);
+ return;
+ }
+
+ c = resync->priv->channels.c[priv_rx->rxq];
+ sq = &c->async_icosq;
+
+ if (resync_post_get_progress_params(sq, priv_rx))
+ mlx5e_ktls_priv_rx_put(priv_rx);
+}
+
+static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
+ struct mlx5e_priv *priv)
+{
+ INIT_WORK(&resync->work, resync_handle_work);
+ resync->priv = priv;
+ refcount_set(&resync->refcnt, 1);
+}
+
+/* Function can be called with the refcount being either elevated or not.
+ * It does not affect the refcount.
+ */
+static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_ktls_resync_resp *ktls_resync;
+ struct mlx5e_icosq *sq;
+ bool trigger_poll;
+
+ sq = &c->async_icosq;
+ ktls_resync = sq->ktls_resync;
+ trigger_poll = false;
+
+ spin_lock_bh(&ktls_resync->lock);
+ spin_lock_bh(&priv_rx->lock);
+ switch (priv_rx->crypto_info.crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ &priv_rx->crypto_info.crypto_info_128;
+
+ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be,
+ sizeof(info->rec_seq));
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ &priv_rx->crypto_info.crypto_info_256;
+
+ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be,
+ sizeof(info->rec_seq));
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ priv_rx->crypto_info.crypto_info.cipher_type);
+ spin_unlock_bh(&priv_rx->lock);
+ spin_unlock_bh(&ktls_resync->lock);
+ return;
+ }
+
+ if (list_empty(&priv_rx->list)) {
+ list_add_tail(&priv_rx->list, &ktls_resync->list);
+ trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+ }
+ spin_unlock_bh(&priv_rx->lock);
+ spin_unlock_bh(&ktls_resync->lock);
+
+ if (!trigger_poll)
+ return;
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ spin_lock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_irq(sq);
+ spin_unlock_bh(&c->async_icosq_lock);
+ }
+}
+
+/* Function can be called with the refcount being either elevated or not.
+ * It decreases the refcount and may free the kTLS priv context.
+ * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was
+ * already in flight.
+ */
+void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf;
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ u8 tracker_state, auth_state, *ctx;
+ struct device *dev;
+ u32 hw_seq;
+
+ priv_rx = buf->priv_rx;
+ resync = &priv_rx->resync;
+ dev = mlx5_core_dma_dev(resync->priv->mdev);
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+ goto out;
+
+ dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE,
+ DMA_FROM_DEVICE);
+
+ ctx = buf->progress.ctx;
+ tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state);
+ auth_state = MLX5_GET(tls_progress_params, ctx, auth_state);
+ if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING ||
+ auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) {
+ priv_rx->rq_stats->tls_resync_req_skip++;
+ goto out;
+ }
+
+ hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn);
+ tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
+ priv_rx->rq_stats->tls_resync_req_end++;
+out:
+ mlx5e_ktls_priv_rx_put(priv_rx);
+ dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+ kfree(buf);
+}
+
+/* Runs in NAPI.
+ * Function elevates the refcount, unless no work is queued.
+ */
+static bool resync_queue_get_psv(struct sock *sk)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+
+ priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk));
+ if (unlikely(!priv_rx))
+ return false;
+
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+ return false;
+
+ resync = &priv_rx->resync;
+ mlx5e_ktls_priv_rx_get(priv_rx);
+ if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
+ mlx5e_ktls_priv_rx_put(priv_rx);
+
+ return true;
+}
+
+/* Runs in NAPI */
+static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)(skb->data);
+ struct net_device *netdev = rq->netdev;
+ struct net *net = dev_net(netdev);
+ struct sock *sk = NULL;
+ unsigned int datalen;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ __be32 seq;
+ int depth = 0;
+
+ __vlan_get_protocol(skb, eth->h_proto, &depth);
+ iph = (struct iphdr *)(skb->data + depth);
+
+ if (iph->version == 4) {
+ depth += sizeof(struct iphdr);
+ th = (void *)iph + sizeof(struct iphdr);
+
+ sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source, iph->daddr,
+ th->dest, netdev->ifindex);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
+
+ depth += sizeof(struct ipv6hdr);
+ th = (void *)ipv6h + sizeof(struct ipv6hdr);
+
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->dest),
+ netdev->ifindex, 0);
+#endif
+ }
+
+ depth += sizeof(struct tcphdr);
+
+ if (unlikely(!sk))
+ return;
+
+ if (unlikely(sk->sk_state == TCP_TIME_WAIT))
+ goto unref;
+
+ if (unlikely(!resync_queue_get_psv(sk)))
+ goto unref;
+
+ seq = th->seq;
+ datalen = skb->len - depth;
+ tls_offload_rx_resync_async_request_start(sk, seq, datalen);
+ rq->stats->tls_resync_req_start++;
+
+unref:
+ sock_gen_put(sk);
+}
+
+void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,
+ u32 seq, u8 *rcd_sn)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct mlx5e_priv *priv;
+ struct mlx5e_channel *c;
+
+ priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk));
+ if (unlikely(!priv_rx))
+ return;
+
+ resync = &priv_rx->resync;
+ resync->sw_rcd_sn_be = *(__be64 *)rcd_sn;
+ resync->seq = seq;
+
+ priv = netdev_priv(netdev);
+ c = priv->channels.c[priv_rx->rxq];
+
+ resync_handle_seq_match(priv_rx, c);
+}
+
+/* End of resync section */
+
+void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, u32 *cqe_bcnt)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ switch (get_cqe_tls_offload(cqe)) {
+ case CQE_TLS_OFFLOAD_DECRYPTED:
+ skb->decrypted = 1;
+ stats->tls_decrypted_packets++;
+ stats->tls_decrypted_bytes += *cqe_bcnt;
+ break;
+ case CQE_TLS_OFFLOAD_RESYNC:
+ stats->tls_resync_req_pkt++;
+ resync_update_sn(rq, skb);
+ break;
+ default: /* CQE_TLS_OFFLOAD_ERROR: */
+ stats->tls_err++;
+ break;
+ }
+}
+
+void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx = wi->tls_set_params.priv_rx;
+ struct accel_rule *rule = &priv_rx->rule;
+
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
+ complete(&priv_rx->add_ctx);
+ return;
+ }
+ queue_work(rule->priv->tls->rx_wq, &rule->work);
+}
+
+static int mlx5e_ktls_sk_get_rxq(struct sock *sk)
+{
+ int rxq = sk_rx_queue_get(sk);
+
+ if (unlikely(rxq == -1))
+ rxq = 0;
+
+ return rxq;
+}
+
+int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct tls_context *tls_ctx;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+ int rxq, err;
+
+ tls_ctx = tls_get_ctx(sk);
+ priv = netdev_priv(netdev);
+ mdev = priv->mdev;
+ priv_rx = kzalloc(sizeof(*priv_rx), GFP_KERNEL);
+ if (unlikely(!priv_rx))
+ return -ENOMEM;
+
+ err = mlx5_ktls_create_key(mdev, crypto_info, &priv_rx->key_id);
+ if (err)
+ goto err_create_key;
+
+ INIT_LIST_HEAD(&priv_rx->list);
+ spin_lock_init(&priv_rx->lock);
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ priv_rx->crypto_info.crypto_info_128 =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ priv_rx->crypto_info.crypto_info_256 =
+ *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->cipher_type);
+ return -EOPNOTSUPP;
+ }
+
+ rxq = mlx5e_ktls_sk_get_rxq(sk);
+ priv_rx->rxq = rxq;
+ priv_rx->sk = sk;
+
+ priv_rx->rq_stats = &priv->channel_stats[rxq]->rq;
+ priv_rx->sw_stats = &priv->tls->sw_stats;
+ mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
+
+ err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir);
+ if (err)
+ goto err_create_tir;
+
+ init_completion(&priv_rx->add_ctx);
+
+ accel_rule_init(&priv_rx->rule, priv);
+ resync = &priv_rx->resync;
+ resync_init(resync, priv);
+ tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core;
+ tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC);
+
+ err = post_rx_param_wqes(priv->channels.c[rxq], priv_rx, start_offload_tcp_sn);
+ if (err)
+ goto err_post_wqes;
+
+ atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx);
+
+ return 0;
+
+err_post_wqes:
+ mlx5e_tir_destroy(&priv_rx->tir);
+err_create_tir:
+ mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
+err_create_key:
+ kfree(priv_rx);
+ return err;
+}
+
+void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ mdev = priv->mdev;
+
+ priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx);
+ set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags);
+ mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL);
+ synchronize_net(); /* Sync with NAPI */
+ if (!cancel_work_sync(&priv_rx->rule.work))
+ /* completion is needed, as the priv_rx in the add flow
+ * is maintained on the wqe info (wi), not on the socket.
+ */
+ wait_for_completion(&priv_rx->add_ctx);
+ resync = &priv_rx->resync;
+ if (cancel_work_sync(&resync->work))
+ mlx5e_ktls_priv_rx_put(priv_rx);
+
+ atomic64_inc(&priv_rx->sw_stats->rx_tls_del);
+ if (priv_rx->rule.rule)
+ mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
+
+ mlx5e_tir_destroy(&priv_rx->tir);
+ mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
+ /* priv_rx should normally be freed here, but if there is an outstanding
+ * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
+ * processed.
+ */
+ mlx5e_ktls_priv_rx_put(priv_rx);
+}
+
+bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp;
+ struct mlx5e_ktls_resync_resp *ktls_resync;
+ struct mlx5_wqe_ctrl_seg *db_cseg;
+ struct mlx5e_icosq *sq;
+ LIST_HEAD(local_list);
+ int i, j;
+
+ sq = &c->async_icosq;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ ktls_resync = sq->ktls_resync;
+ db_cseg = NULL;
+ i = 0;
+
+ spin_lock(&ktls_resync->lock);
+ list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) {
+ list_move(&priv_rx->list, &local_list);
+ if (++i == budget)
+ break;
+ }
+ if (list_empty(&ktls_resync->list))
+ clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+ spin_unlock(&ktls_resync->lock);
+
+ spin_lock(&c->async_icosq_lock);
+ for (j = 0; j < i; j++) {
+ struct mlx5_wqe_ctrl_seg *cseg;
+
+ priv_rx = list_first_entry(&local_list,
+ struct mlx5e_ktls_offload_context_rx,
+ list);
+ spin_lock(&priv_rx->lock);
+ cseg = post_static_params(sq, priv_rx);
+ if (IS_ERR(cseg)) {
+ spin_unlock(&priv_rx->lock);
+ break;
+ }
+ list_del_init(&priv_rx->list);
+ spin_unlock(&priv_rx->lock);
+ db_cseg = cseg;
+ }
+ if (db_cseg)
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg);
+ spin_unlock(&c->async_icosq_lock);
+
+ priv_rx->rq_stats->tls_resync_res_ok += j;
+
+ if (!list_empty(&local_list)) {
+ /* This happens only if ICOSQ is full.
+ * There is no need to mark busy or explicitly ask for a NAPI cycle,
+ * it will be triggered by the outstanding ICOSQ completions.
+ */
+ spin_lock(&ktls_resync->lock);
+ list_splice(&local_list, &ktls_resync->list);
+ set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+ spin_unlock(&ktls_resync->lock);
+ priv_rx->rq_stats->tls_resync_res_retry++;
+ }
+
+ return i == budget;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
index 01468ec27446..7c1c0eb16787 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
@@ -35,55 +35,58 @@
#include <net/sock.h>
#include "en.h"
-#include "accel/tls.h"
#include "fpga/sdk.h"
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
-static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
+static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) },
};
#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
-#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
-
-int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv)
{
if (!priv->tls)
return 0;
- return NUM_TLS_SW_COUNTERS;
+ return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
}
-int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
{
- unsigned int i, idx = 0;
+ unsigned int i, n, idx = 0;
if (!priv->tls)
return 0;
- for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ n = mlx5e_ktls_get_count(priv);
+
+ for (i = 0; i < n; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
- mlx5e_tls_sw_stats_desc[i].format);
+ mlx5e_ktls_sw_stats_desc[i].format);
- return NUM_TLS_SW_COUNTERS;
+ return n;
}
-int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
{
- int i, idx = 0;
+ unsigned int i, n, idx = 0;
if (!priv->tls)
return 0;
- for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
- data[idx++] =
- MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
- mlx5e_tls_sw_stats_desc, i);
+ n = mlx5e_ktls_get_count(priv);
+
+ for (i = 0; i < n; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_ktls_sw_stats_desc,
+ i);
- return NUM_TLS_SW_COUNTERS;
+ return n;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index f260dd96873b..2e0335246967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -1,122 +1,544 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2019 Mellanox Technologies.
-#include <linux/tls.h>
-#include "en.h"
-#include "en/txrx.h"
#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include "en_accel/ktls_utils.h"
-enum {
- MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+struct mlx5e_dump_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_data_seg data;
};
-enum {
- MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
-};
+#define MLX5E_KTLS_DUMP_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
-#define EXTRACT_INFO_FIELDS do { \
- salt = info->salt; \
- rec_seq = info->rec_seq; \
- salt_sz = sizeof(info->salt); \
- rec_seq_sz = sizeof(info->rec_seq); \
-} while (0)
+static u8
+mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags,
+ unsigned int sync_len)
+{
+ /* Given the MTU and sync_len, calculates an upper bound for the
+ * number of DUMP WQEs needed for the TX resync of a record.
+ */
+ return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu));
+}
-static void
-fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
- struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
- char *initial_rn, *gcm_iv;
- u16 salt_sz, rec_seq_sz;
- char *salt, *rec_seq;
- u8 tls_version;
+ u16 num_dumps, stop_room = 0;
- EXTRACT_INFO_FIELDS;
+ if (!mlx5e_is_ktls_tx(mdev))
+ return 0;
- gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
- initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+ num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
- memcpy(gcm_iv, salt, salt_sz);
- memcpy(initial_rn, rec_seq, rec_seq_sz);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
+ stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS);
+ stop_room += 1; /* fence nop */
- tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+ return stop_room;
+}
- MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
- MLX5_SET(tls_static_params, ctx, const_1, 1);
- MLX5_SET(tls_static_params, ctx, const_2, 2);
- MLX5_SET(tls_static_params, ctx, encryption_standard,
- MLX5E_ENCRYPTION_STANDARD_TLS);
- MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id);
+static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc)
+{
+ MLX5_SET(tisc, tisc, tls_en, 1);
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
}
-static void
-build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn,
- struct mlx5e_ktls_offload_context_tx *priv_tx,
- bool fence)
+static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
{
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
-#define STATIC_PARAMS_DS_CNT \
- DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS)
+ mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx));
- cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR |
- (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24));
- cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
- STATIC_PARAMS_DS_CNT);
- cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
- cseg->tisn = cpu_to_be32(priv_tx->tisn << 8);
+ return mlx5_core_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev,
+ struct mlx5_async_ctx *async_ctx,
+ u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
- ucseg->flags = MLX5_UMR_INLINE;
- ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+ mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx));
+ MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
- fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx);
+ return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in),
+ out, outlen, callback, context);
}
-static void
-fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn,
+ struct mlx5_async_ctx *async_ctx,
+ u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
{
- MLX5_SET(tls_progress_params, ctx, tisn, priv_tx->tisn);
- MLX5_SET(tls_progress_params, ctx, record_tracker_state,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
- MLX5_SET(tls_progress_params, ctx, auth_state,
- MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+
+ return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in),
+ out, outlen, callback, context);
}
+struct mlx5e_ktls_offload_context_tx {
+ /* fast path */
+ u32 expected_seq;
+ u32 tisn;
+ bool ctx_post_pending;
+ /* control / resync */
+ struct list_head list_node; /* member of the pool */
+ union mlx5e_crypto_info crypto_info;
+ struct tls_offload_context_tx *tx_ctx;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ u32 key_id;
+ u8 create_err : 1;
+};
+
static void
-build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
- struct mlx5e_ktls_offload_context_tx *priv_tx,
- bool fence)
+mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_tx *priv_tx)
{
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5e_ktls_offload_context_tx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
-#define PROGRESS_PARAMS_DS_CNT \
- DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS)
+ BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX);
- cseg->opmod_idx_opcode =
- cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV |
- (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24));
- cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
- PROGRESS_PARAMS_DS_CNT);
- cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+ *ctx = priv_tx;
+}
- fill_progress_params_ctx(wqe->tls_progress_params_ctx, priv_tx);
+static struct mlx5e_ktls_offload_context_tx *
+mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_tx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
+
+ return *ctx;
}
-static void tx_fill_wi(struct mlx5e_txqsq *sq,
- u16 pi, u8 num_wqebbs, u32 num_bytes,
- struct page *page)
+/* struct for callback API management */
+struct mlx5e_async_ctx {
+ struct mlx5_async_work context;
+ struct mlx5_async_ctx async_ctx;
+ struct work_struct work;
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct completion complete;
+ int err;
+ union {
+ u32 out_create[MLX5_ST_SZ_DW(create_tis_out)];
+ u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)];
+ };
+};
+
+static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n)
{
- struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+ struct mlx5e_async_ctx *bulk_async;
+ int i;
+
+ bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL);
+ if (!bulk_async)
+ return NULL;
+
+ for (i = 0; i < n; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ mlx5_cmd_init_async_ctx(mdev, &async->async_ctx);
+ init_completion(&async->complete);
+ }
+
+ return bulk_async;
+}
+
+static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ mlx5_cmd_cleanup_async_ctx(&async->async_ctx);
+ }
+ kvfree(bulk_async);
+}
+
+static void create_tis_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5e_async_ctx *async =
+ container_of(context, struct mlx5e_async_ctx, context);
+ struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx;
+
+ if (status) {
+ async->err = status;
+ priv_tx->create_err = 1;
+ goto out;
+ }
+
+ priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn);
+out:
+ complete(&async->complete);
+}
+
+static void destroy_tis_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5e_async_ctx *async =
+ container_of(context, struct mlx5e_async_ctx, context);
+ struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx;
+
+ complete(&async->complete);
+ kfree(priv_tx);
+}
+
+static struct mlx5e_ktls_offload_context_tx *
+mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats,
+ struct mlx5e_async_ctx *async)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ int err;
+
+ priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL);
+ if (!priv_tx)
+ return ERR_PTR(-ENOMEM);
+
+ priv_tx->mdev = mdev;
+ priv_tx->sw_stats = sw_stats;
+
+ if (!async) {
+ err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn);
+ if (err)
+ goto err_out;
+ } else {
+ async->priv_tx = priv_tx;
+ err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx,
+ async->out_create, sizeof(async->out_create),
+ create_tis_callback, &async->context);
+ if (err)
+ goto err_out;
+ }
+
+ return priv_tx;
+
+err_out:
+ kfree(priv_tx);
+ return ERR_PTR(err);
+}
+
+static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ struct mlx5e_async_ctx *async)
+{
+ if (priv_tx->create_err) {
+ complete(&async->complete);
+ kfree(priv_tx);
+ return;
+ }
+ async->priv_tx = priv_tx;
+ mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn,
+ &async->async_ctx,
+ async->out_destroy, sizeof(async->out_destroy),
+ destroy_tis_callback, &async->context);
+}
+
+static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
+ struct list_head *list, int size)
+{
+ struct mlx5e_ktls_offload_context_tx *obj, *n;
+ struct mlx5e_async_ctx *bulk_async;
+ int i;
+
+ bulk_async = mlx5e_bulk_async_init(mdev, size);
+ if (!bulk_async)
+ return;
+
+ i = 0;
+ list_for_each_entry_safe(obj, n, list, list_node) {
+ mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]);
+ i++;
+ }
+
+ for (i = 0; i < size; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ wait_for_completion(&async->complete);
+ }
+ mlx5e_bulk_async_cleanup(bulk_async, size);
+}
+
+/* Recycling pool API */
+
+#define MLX5E_TLS_TX_POOL_BULK (16)
+#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024)
+#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4)
+
+struct mlx5e_tls_tx_pool {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ struct mutex lock; /* Protects access to the pool */
+ struct list_head list;
+ size_t size;
+
+ struct workqueue_struct *wq;
+ struct work_struct create_work;
+ struct work_struct destroy_work;
+};
+
+static void create_work(struct work_struct *work)
+{
+ struct mlx5e_tls_tx_pool *pool =
+ container_of(work, struct mlx5e_tls_tx_pool, create_work);
+ struct mlx5e_ktls_offload_context_tx *obj;
+ struct mlx5e_async_ctx *bulk_async;
+ LIST_HEAD(local_list);
+ int i, j, err = 0;
+
+ bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK);
+ if (!bulk_async)
+ return;
+
+ for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) {
+ obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ break;
+ }
+ list_add(&obj->list_node, &local_list);
+ }
+
+ for (j = 0; j < i; j++) {
+ struct mlx5e_async_ctx *async = &bulk_async[j];
+
+ wait_for_completion(&async->complete);
+ if (!err && async->err)
+ err = async->err;
+ }
+ atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc);
+ mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK);
+ if (err)
+ goto err_out;
+
+ mutex_lock(&pool->lock);
+ if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) {
+ mutex_unlock(&pool->lock);
+ goto err_out;
+ }
+ list_splice(&local_list, &pool->list);
+ pool->size += MLX5E_TLS_TX_POOL_BULK;
+ if (pool->size <= MLX5E_TLS_TX_POOL_LOW)
+ queue_work(pool->wq, work);
+ mutex_unlock(&pool->lock);
+ return;
+
+err_out:
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i);
+ atomic64_add(i, &pool->sw_stats->tx_tls_pool_free);
+}
+
+static void destroy_work(struct work_struct *work)
+{
+ struct mlx5e_tls_tx_pool *pool =
+ container_of(work, struct mlx5e_tls_tx_pool, destroy_work);
+ struct mlx5e_ktls_offload_context_tx *obj;
+ LIST_HEAD(local_list);
+ int i = 0;
+
+ mutex_lock(&pool->lock);
+ if (pool->size < MLX5E_TLS_TX_POOL_HIGH) {
+ mutex_unlock(&pool->lock);
+ return;
+ }
+
+ list_for_each_entry(obj, &pool->list, list_node)
+ if (++i == MLX5E_TLS_TX_POOL_BULK)
+ break;
+
+ list_cut_position(&local_list, &pool->list, &obj->list_node);
+ pool->size -= MLX5E_TLS_TX_POOL_BULK;
+ if (pool->size >= MLX5E_TLS_TX_POOL_HIGH)
+ queue_work(pool->wq, work);
+ mutex_unlock(&pool->lock);
+
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK);
+ atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free);
+}
+
+static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev,
+ struct mlx5e_tls_sw_stats *sw_stats)
+{
+ struct mlx5e_tls_tx_pool *pool;
+
+ BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH);
+
+ pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
- memset(wi, 0, sizeof(*wi));
- wi->num_wqebbs = num_wqebbs;
- wi->num_bytes = num_bytes;
- wi->resync_dump_frag_page = page;
+ pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool");
+ if (!pool->wq)
+ goto err_free;
+
+ INIT_LIST_HEAD(&pool->list);
+ mutex_init(&pool->lock);
+
+ INIT_WORK(&pool->create_work, create_work);
+ INIT_WORK(&pool->destroy_work, destroy_work);
+
+ pool->mdev = mdev;
+ pool->sw_stats = sw_stats;
+
+ return pool;
+
+err_free:
+ kvfree(pool);
+ return NULL;
}
-void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool)
{
+ while (pool->size > MLX5E_TLS_TX_POOL_BULK) {
+ struct mlx5e_ktls_offload_context_tx *obj;
+ LIST_HEAD(local_list);
+ int i = 0;
+
+ list_for_each_entry(obj, &pool->list, list_node)
+ if (++i == MLX5E_TLS_TX_POOL_BULK)
+ break;
+
+ list_cut_position(&local_list, &pool->list, &obj->list_node);
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK);
+ atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free);
+ pool->size -= MLX5E_TLS_TX_POOL_BULK;
+ }
+ if (pool->size) {
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size);
+ atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free);
+ }
+}
+
+static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool)
+{
+ mlx5e_tls_tx_pool_list_cleanup(pool);
+ destroy_workqueue(pool->wq);
+ kvfree(pool);
+}
+
+static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj)
+{
+ mutex_lock(&pool->lock);
+ list_add(&obj->list_node, &pool->list);
+ if (++pool->size == MLX5E_TLS_TX_POOL_HIGH)
+ queue_work(pool->wq, &pool->destroy_work);
+ mutex_unlock(&pool->lock);
+}
+
+static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool)
+{
+ struct mlx5e_ktls_offload_context_tx *obj;
+
+ mutex_lock(&pool->lock);
+ if (unlikely(pool->size == 0)) {
+ /* pool is empty:
+ * - trigger the populating work, and
+ * - serve the current context via the regular blocking api.
+ */
+ queue_work(pool->wq, &pool->create_work);
+ mutex_unlock(&pool->lock);
+ obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL);
+ if (!IS_ERR(obj))
+ atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc);
+ return obj;
+ }
+
+ obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx,
+ list_node);
+ list_del(&obj->list_node);
+ if (--pool->size == MLX5E_TLS_TX_POOL_LOW)
+ queue_work(pool->wq, &pool->create_work);
+ mutex_unlock(&pool->lock);
+ return obj;
+}
+
+/* End of pool API */
+
+int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_tls_tx_pool *pool;
+ struct tls_context *tls_ctx;
+ struct mlx5e_priv *priv;
+ int err;
+
+ tls_ctx = tls_get_ctx(sk);
+ priv = netdev_priv(netdev);
+ pool = priv->tls->tx_pool;
+
+ priv_tx = pool_pop(pool);
+ if (IS_ERR(priv_tx))
+ return PTR_ERR(priv_tx);
+
+ err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id);
+ if (err)
+ goto err_create_key;
+
+ priv_tx->expected_seq = start_offload_tcp_sn;
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ priv_tx->crypto_info.crypto_info_128 =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ priv_tx->crypto_info.crypto_info_256 =
+ *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->cipher_type);
+ return -EOPNOTSUPP;
+ }
+ priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx);
+
+ mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx);
+
priv_tx->ctx_post_pending = true;
+ atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx);
+
+ return 0;
+
+err_create_key:
+ pool_push(pool, priv_tx);
+ return err;
+}
+
+void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_tls_tx_pool *pool;
+ struct mlx5e_priv *priv;
+
+ priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+ priv = netdev_priv(netdev);
+ pool = priv->tls->tx_pool;
+
+ atomic64_inc(&priv_tx->sw_stats->tx_tls_del);
+ mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id);
+ pool_push(pool, priv_tx);
+}
+
+static void tx_fill_wi(struct mlx5e_txqsq *sq,
+ u16 pi, u8 num_wqebbs, u32 num_bytes,
+ struct page *page)
+{
+ struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_bytes = num_bytes,
+ .resync_dump_frag_page = page,
+ };
}
static bool
@@ -134,13 +556,17 @@ post_static_params(struct mlx5e_txqsq *sq,
struct mlx5e_ktls_offload_context_tx *priv_tx,
bool fence)
{
- struct mlx5e_umr_wqe *umr_wqe;
- u16 pi;
-
- umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
- build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
- tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL);
- sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
+ struct mlx5e_set_tls_static_params_wqe *wqe;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS;
+ pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_tx->crypto_info,
+ priv_tx->tisn, priv_tx->key_id, 0, fence,
+ TLS_OFFLOAD_CTX_DIR_TX);
+ tx_fill_wi(sq, pi, num_wqebbs, 0, NULL);
+ sq->pc += num_wqebbs;
}
static void
@@ -148,13 +574,26 @@ post_progress_params(struct mlx5e_txqsq *sq,
struct mlx5e_ktls_offload_context_tx *priv_tx,
bool fence)
{
- struct mlx5e_tx_wqe *wqe;
- u16 pi;
+ struct mlx5e_set_tls_progress_params_wqe *wqe;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS;
+ pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_tx->tisn, fence, 0,
+ TLS_OFFLOAD_CTX_DIR_TX);
+ tx_fill_wi(sq, pi, num_wqebbs, 0, NULL);
+ sq->pc += num_wqebbs;
+}
+
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ tx_fill_wi(sq, pi, 1, 0, NULL);
- wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
- build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
- tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL);
- sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
+ mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
}
static void
@@ -163,19 +602,12 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
bool skip_static_post, bool fence_first_post)
{
bool progress_fence = skip_static_post || !fence_first_post;
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 contig_wqebbs_room, pi;
-
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room <
- MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
if (!skip_static_post)
post_static_params(sq, priv_tx, fence_first_post);
post_progress_params(sq, priv_tx, progress_fence);
+ tx_post_fence_nop(sq);
}
struct tx_sync_info {
@@ -218,7 +650,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
* this packet was already acknowledged and its record info
* was released.
*/
- ends_before = before(tcp_seq + datalen, tls_record_start_seq(record));
+ ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record));
if (unlikely(tls_record_is_start_marker(record))) {
ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
@@ -251,14 +683,31 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
struct mlx5e_ktls_offload_context_tx *priv_tx,
u64 rcd_sn)
{
- struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
__be64 rn_be = cpu_to_be64(rcd_sn);
bool skip_static_post;
u16 rec_seq_sz;
char *rec_seq;
- rec_seq = info->rec_seq;
- rec_seq_sz = sizeof(info->rec_seq);
+ switch (priv_tx->crypto_info.crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info.crypto_info_128;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info = &priv_tx->crypto_info.crypto_info_256;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ priv_tx->crypto_info.crypto_info.cipher_type);
+ return;
+ }
skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
if (!skip_static_post)
@@ -268,7 +717,7 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
}
static int
-tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first)
+tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn)
{
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_data_seg *dseg;
@@ -278,7 +727,9 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
int fsz;
u16 pi;
- wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+ BUILD_BUG_ON(MLX5E_KTLS_DUMP_WQEBBS != 1);
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wqe = MLX5E_TLS_FETCH_DUMP_WQE(sq, pi);
ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
@@ -287,8 +738,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- cseg->tisn = cpu_to_be32(tisn << 8);
- cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+ cseg->tis_tir_num = cpu_to_be32(tisn << 8);
fsz = skb_frag_size(frag);
dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
@@ -314,9 +764,6 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_sq_stats *stats;
struct mlx5e_sq_dma *dma;
- if (!wi->resync_dump_frag_page)
- return;
-
dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
stats = sq->stats;
@@ -326,77 +773,39 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
stats->tls_dump_bytes += wi->num_bytes;
}
-static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-
- tx_fill_wi(sq, pi, 1, 0, NULL);
-
- mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
-}
-
static enum mlx5e_ktls_sync_retval
mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
struct mlx5e_txqsq *sq,
int datalen,
u32 seq)
{
- struct mlx5e_sq_stats *stats = sq->stats;
- struct mlx5_wq_cyc *wq = &sq->wq;
enum mlx5e_ktls_sync_retval ret;
struct tx_sync_info info = {};
- u16 contig_wqebbs_room, pi;
- u8 num_wqebbs;
- int i = 0;
+ int i;
ret = tx_sync_info_get(priv_tx, seq, datalen, &info);
- if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) {
- if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) {
- stats->tls_skip_no_sync_data++;
- return MLX5E_KTLS_SYNC_SKIP_NO_DATA;
- }
- /* We might get here if a retransmission reaches the driver
- * after the relevant record is acked.
+ if (unlikely(ret != MLX5E_KTLS_SYNC_DONE))
+ /* We might get here with ret == FAIL if a retransmission
+ * reaches the driver after the relevant record is acked.
* It should be safe to drop the packet in this case
*/
- stats->tls_drop_no_sync_data++;
- goto err_out;
- }
-
- stats->tls_ooo++;
+ return ret;
tx_post_resync_params(sq, priv_tx, info.rcd_sn);
- /* If no dump WQE was sent, we need to have a fence NOP WQE before the
- * actual data xmit.
- */
- if (!info.nr_frags) {
- tx_post_fence_nop(sq);
- return MLX5E_KTLS_SYNC_DONE;
- }
-
- num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-
- if (unlikely(contig_wqebbs_room < num_wqebbs))
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-
- for (; i < info.nr_frags; i++) {
+ for (i = 0; i < info.nr_frags; i++) {
unsigned int orig_fsz, frag_offset = 0, n = 0;
skb_frag_t *f = &info.frags[i];
orig_fsz = skb_frag_size(f);
do {
- bool fence = !(i || frag_offset);
unsigned int fsz;
n++;
fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
skb_frag_size_set(f, fsz);
- if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+ if (tx_post_resync_dump(sq, f, priv_tx->tisn)) {
page_ref_add(skb_frag_page(f), n - 1);
goto err_out;
}
@@ -422,68 +831,91 @@ err_out:
return MLX5E_KTLS_SYNC_FAIL;
}
-struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
- struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
- struct mlx5e_tx_wqe **wqe, u16 *pi)
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_tls_state *state)
{
struct mlx5e_ktls_offload_context_tx *priv_tx;
struct mlx5e_sq_stats *stats = sq->stats;
- struct mlx5_wqe_ctrl_seg *cseg;
+ struct net_device *tls_netdev;
struct tls_context *tls_ctx;
int datalen;
u32 seq;
- if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
- goto out;
-
- datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ datalen = skb->len - skb_tcp_all_headers(skb);
if (!datalen)
- goto out;
+ return true;
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
tls_ctx = tls_get_ctx(skb->sk);
- if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
+ tls_netdev = rcu_dereference_bh(tls_ctx->netdev);
+ /* Don't WARN on NULL: if tls_device_down is running in parallel,
+ * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was
+ * true. Rather continue processing this packet.
+ */
+ if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev))
goto err_out;
priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
- if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+ if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx)))
mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
- *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
- stats->tls_ctx++;
- }
seq = ntohl(tcp_hdr(skb)->seq);
if (unlikely(priv_tx->expected_seq != seq)) {
enum mlx5e_ktls_sync_retval ret =
mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
+ stats->tls_ooo++;
+
switch (ret) {
case MLX5E_KTLS_SYNC_DONE:
- *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
break;
case MLX5E_KTLS_SYNC_SKIP_NO_DATA:
+ stats->tls_skip_no_sync_data++;
if (likely(!skb->decrypted))
goto out;
WARN_ON_ONCE(1);
- /* fall-through */
- default: /* MLX5E_KTLS_SYNC_FAIL */
+ goto err_out;
+ case MLX5E_KTLS_SYNC_FAIL:
+ stats->tls_drop_no_sync_data++;
goto err_out;
}
}
priv_tx->expected_seq = seq + datalen;
- cseg = &(*wqe)->ctrl;
- cseg->tisn = cpu_to_be32(priv_tx->tisn << 8);
+ state->tls_tisn = priv_tx->tisn;
stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
stats->tls_encrypted_bytes += datalen;
out:
- return skb;
+ return true;
err_out:
dev_kfree_skb_any(skb);
- return NULL;
+ return false;
+}
+
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_tx(priv->mdev))
+ return 0;
+
+ priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
+ if (!priv->tls->tx_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_tx(priv->mdev))
+ return;
+
+ mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
+ priv->tls->tx_pool = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
new file mode 100644
index 000000000000..570a912dd6fa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en_accel/ktls_txrx.h"
+#include "en_accel/ktls_utils.h"
+
+enum {
+ MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+};
+
+enum {
+ MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
+};
+
+#define EXTRACT_INFO_FIELDS do { \
+ salt = info->salt; \
+ rec_seq = info->rec_seq; \
+ salt_sz = sizeof(info->salt); \
+ rec_seq_sz = sizeof(info->rec_seq); \
+} while (0)
+
+static void
+fill_static_params(struct mlx5_wqe_tls_static_params_seg *params,
+ union mlx5e_crypto_info *crypto_info,
+ u32 key_id, u32 resync_tcp_sn)
+{
+ char *initial_rn, *gcm_iv;
+ u16 salt_sz, rec_seq_sz;
+ char *salt, *rec_seq;
+ u8 tls_version;
+ u8 *ctx;
+
+ ctx = params->ctx;
+
+ switch (crypto_info->crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ &crypto_info->crypto_info_128;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ &crypto_info->crypto_info_256;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->crypto_info.cipher_type);
+ return;
+ }
+
+ gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
+ initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+
+ memcpy(gcm_iv, salt, salt_sz);
+ memcpy(initial_rn, rec_seq, rec_seq_sz);
+
+ tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+
+ MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
+ MLX5_SET(tls_static_params, ctx, const_1, 1);
+ MLX5_SET(tls_static_params, ctx, const_2, 2);
+ MLX5_SET(tls_static_params, ctx, encryption_standard,
+ MLX5E_ENCRYPTION_STANDARD_TLS);
+ MLX5_SET(tls_static_params, ctx, resync_tcp_sn, resync_tcp_sn);
+ MLX5_SET(tls_static_params, ctx, dek_index, key_id);
+}
+
+void
+mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ union mlx5e_crypto_info *crypto_info,
+ u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn,
+ bool fence, enum tls_offload_ctx_dir direction)
+{
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ?
+ MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS :
+ MLX5_OPC_MOD_TLS_TIR_STATIC_PARAMS;
+
+#define STATIC_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | (opmod << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ STATIC_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+ cseg->tis_tir_num = cpu_to_be32(tis_tir_num << 8);
+
+ ucseg->flags = MLX5_UMR_INLINE;
+ ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+
+ fill_static_params(&wqe->params, crypto_info, key_id, resync_tcp_sn);
+}
+
+static void
+fill_progress_params(struct mlx5_wqe_tls_progress_params_seg *params, u32 tis_tir_num,
+ u32 next_record_tcp_sn)
+{
+ u8 *ctx = params->ctx;
+
+ params->tis_tir_num = cpu_to_be32(tis_tir_num);
+
+ MLX5_SET(tls_progress_params, ctx, next_record_tcp_sn,
+ next_record_tcp_sn);
+ MLX5_SET(tls_progress_params, ctx, record_tracker_state,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
+ MLX5_SET(tls_progress_params, ctx, auth_state,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+}
+
+void
+mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ u32 tis_tir_num, bool fence,
+ u32 next_record_tcp_sn,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ?
+ MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS :
+ MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS;
+
+#define PROGRESS_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | (opmod << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ PROGRESS_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+ fill_progress_params(&wqe->params, tis_tir_num, next_record_tcp_sn);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
new file mode 100644
index 000000000000..2dd78dd4ad65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_KTLS_TXRX_H__
+#define __MLX5E_KTLS_TXRX_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <net/tls.h>
+#include "en.h"
+#include "en/txrx.h"
+
+struct mlx5e_accel_tx_tls_state {
+ u32 tls_tisn;
+};
+
+u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_tls_state *state);
+void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, u32 *cqe_bcnt);
+
+void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi);
+void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
+ struct mlx5e_icosq *sq);
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc);
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ if (unlikely(wi->resync_dump_frag_page)) {
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+ return true;
+ }
+ return false;
+}
+
+bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget);
+
+static inline bool
+mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
+{
+ return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state);
+}
+
+static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb)
+{
+ return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
+static inline void
+mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
+ struct mlx5e_accel_tx_tls_state *state)
+{
+ cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
+}
+#else
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ return false;
+}
+
+static inline bool
+mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+{
+ return false;
+}
+
+static inline bool
+mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
+{
+ return false;
+}
+
+static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return 0;
+}
+
+static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe,
+ u32 *cqe_bcnt)
+{
+}
+#endif /* CONFIG_MLX5_EN_TLS */
+
+#endif /* __MLX5E_TLS_TXRX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
new file mode 100644
index 000000000000..3d79cd379890
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_KTLS_UTILS_H__
+#define __MLX5E_KTLS_UTILS_H__
+
+#include <net/tls.h>
+#include "en.h"
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2,
+};
+
+int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn);
+void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx);
+int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn);
+void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx);
+void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, u32 seq, u8 *rcd_sn);
+
+union mlx5e_crypto_info {
+ struct tls_crypto_info crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 crypto_info_128;
+ struct tls12_crypto_info_aes_gcm_256 crypto_info_256;
+};
+
+struct mlx5e_set_tls_static_params_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_umr_ctrl_seg uctrl;
+ struct mlx5_mkey_seg mkc;
+ struct mlx5_wqe_tls_static_params_seg params;
+};
+
+struct mlx5e_set_tls_progress_params_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_tls_progress_params_seg params;
+};
+
+struct mlx5e_get_tls_progress_params_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_seg_get_psv psv;
+};
+
+#define MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_static_params_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_progress_params_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KTLS_GET_PROGRESS_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_get_tls_progress_params_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi) \
+ ((struct mlx5e_set_tls_static_params_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_static_params_wqe)))
+
+#define MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi) \
+ ((struct mlx5e_set_tls_progress_params_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_progress_params_wqe)))
+
+#define MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi) \
+ ((struct mlx5e_get_tls_progress_params_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_get_tls_progress_params_wqe)))
+
+#define MLX5E_TLS_FETCH_DUMP_WQE(sq, pi) \
+ ((struct mlx5e_dump_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_dump_wqe)))
+
+void
+mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ union mlx5e_crypto_info *crypto_info,
+ u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn,
+ bool fence, enum tls_offload_ctx_dir direction);
+void
+mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ u32 tis_tir_num, bool fence,
+ u32 next_record_tcp_sn,
+ enum tls_offload_ctx_dir direction);
+
+#endif /* __MLX5E_TLS_UTILS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
new file mode 100644
index 000000000000..2ef36cb9555a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -0,0 +1,1861 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/xarray.h>
+
+#include "en.h"
+#include "lib/aso.h"
+#include "lib/mlx5.h"
+#include "en_accel/macsec.h"
+#include "en_accel/macsec_fs.h"
+
+#define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L
+#define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso)
+
+enum mlx5_macsec_aso_event_arm {
+ MLX5E_ASO_EPN_ARM = BIT(0),
+};
+
+enum {
+ MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET,
+};
+
+struct mlx5e_macsec_handle {
+ struct mlx5e_macsec *macsec;
+ u32 obj_id;
+ u8 idx;
+};
+
+enum {
+ MLX5_MACSEC_EPN,
+};
+
+struct mlx5e_macsec_aso_out {
+ u8 event_arm;
+ u32 mode_param;
+};
+
+struct mlx5e_macsec_aso_in {
+ u8 mode;
+ u32 obj_id;
+};
+
+struct mlx5e_macsec_epn_state {
+ u32 epn_msb;
+ u8 epn_enabled;
+ u8 overlap;
+};
+
+struct mlx5e_macsec_async_work {
+ struct mlx5e_macsec *macsec;
+ struct mlx5_core_dev *mdev;
+ struct work_struct work;
+ u32 obj_id;
+};
+
+struct mlx5e_macsec_sa {
+ bool active;
+ u8 assoc_num;
+ u32 macsec_obj_id;
+ u32 enc_key_id;
+ u32 next_pn;
+ sci_t sci;
+ salt_t salt;
+
+ struct rhash_head hash;
+ u32 fs_id;
+ union mlx5e_macsec_rule *macsec_rule;
+ struct rcu_head rcu_head;
+ struct mlx5e_macsec_epn_state epn_state;
+};
+
+struct mlx5e_macsec_rx_sc;
+struct mlx5e_macsec_rx_sc_xarray_element {
+ u32 fs_id;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+};
+
+struct mlx5e_macsec_rx_sc {
+ bool active;
+ sci_t sci;
+ struct mlx5e_macsec_sa *rx_sa[MACSEC_NUM_AN];
+ struct list_head rx_sc_list_element;
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ struct metadata_dst *md_dst;
+ struct rcu_head rcu_head;
+};
+
+struct mlx5e_macsec_umr {
+ dma_addr_t dma_addr;
+ u8 ctx[MLX5_ST_SZ_BYTES(macsec_aso)];
+ u32 mkey;
+};
+
+struct mlx5e_macsec_aso {
+ /* ASO */
+ struct mlx5_aso *maso;
+ /* Protects macsec ASO */
+ struct mutex aso_lock;
+ /* UMR */
+ struct mlx5e_macsec_umr *umr;
+
+ u32 pdn;
+};
+
+static const struct rhashtable_params rhash_sci = {
+ .key_len = sizeof_field(struct mlx5e_macsec_sa, sci),
+ .key_offset = offsetof(struct mlx5e_macsec_sa, sci),
+ .head_offset = offsetof(struct mlx5e_macsec_sa, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+struct mlx5e_macsec_device {
+ const struct net_device *netdev;
+ struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN];
+ struct list_head macsec_rx_sc_list_head;
+ unsigned char *dev_addr;
+ struct list_head macsec_device_list_element;
+};
+
+struct mlx5e_macsec {
+ struct list_head macsec_device_list_head;
+ int num_of_devices;
+ struct mlx5e_macsec_fs *macsec_fs;
+ struct mutex lock; /* Protects mlx5e_macsec internal contexts */
+
+ /* Tx sci -> fs id mapping handling */
+ struct rhashtable sci_hash; /* sci -> mlx5e_macsec_sa */
+
+ /* Rx fs_id -> rx_sc mapping */
+ struct xarray sc_xarray;
+
+ struct mlx5_core_dev *mdev;
+
+ /* Stats manage */
+ struct mlx5e_macsec_stats stats;
+
+ /* ASO */
+ struct mlx5e_macsec_aso aso;
+
+ struct notifier_block nb;
+ struct workqueue_struct *wq;
+};
+
+struct mlx5_macsec_obj_attrs {
+ u32 aso_pdn;
+ u32 next_pn;
+ __be64 sci;
+ u32 enc_key_id;
+ bool encrypt;
+ struct mlx5e_macsec_epn_state epn_state;
+ salt_t salt;
+ __be32 ssci;
+ bool replay_protect;
+ u32 replay_window;
+};
+
+struct mlx5_aso_ctrl_param {
+ u8 data_mask_mode;
+ u8 condition_0_operand;
+ u8 condition_1_operand;
+ u8 condition_0_offset;
+ u8 condition_1_offset;
+ u8 data_offset;
+ u8 condition_operand;
+ u32 condition_0_data;
+ u32 condition_0_mask;
+ u32 condition_1_data;
+ u32 condition_1_mask;
+ u64 bitwise_data;
+ u64 data_mask;
+};
+
+static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso)
+{
+ struct mlx5e_macsec_umr *umr;
+ struct device *dma_device;
+ dma_addr_t dma_addr;
+ int err;
+
+ umr = kzalloc(sizeof(*umr), GFP_KERNEL);
+ if (!umr) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ dma_device = &mdev->pdev->dev;
+ dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+ err = dma_mapping_error(dma_device, dma_addr);
+ if (err) {
+ mlx5_core_err(mdev, "Can't map dma device, err=%d\n", err);
+ goto out_dma;
+ }
+
+ err = mlx5e_create_mkey(mdev, aso->pdn, &umr->mkey);
+ if (err) {
+ mlx5_core_err(mdev, "Can't create mkey, err=%d\n", err);
+ goto out_mkey;
+ }
+
+ umr->dma_addr = dma_addr;
+
+ aso->umr = umr;
+
+ return 0;
+
+out_mkey:
+ dma_unmap_single(dma_device, dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+out_dma:
+ kfree(umr);
+ return err;
+}
+
+static void mlx5e_macsec_aso_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso)
+{
+ struct mlx5e_macsec_umr *umr = aso->umr;
+
+ mlx5_core_destroy_mkey(mdev, umr->mkey);
+ dma_unmap_single(&mdev->pdev->dev, umr->dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+ kfree(umr);
+}
+
+static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, void *aso_ctx)
+{
+ u8 window_sz;
+
+ if (!attrs->replay_protect)
+ return 0;
+
+ switch (attrs->replay_window) {
+ case 256:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT;
+ break;
+ case 128:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT;
+ break;
+ case 64:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT;
+ break;
+ case 32:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz);
+ MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION);
+
+ return 0;
+}
+
+static int mlx5e_macsec_create_object(struct mlx5_core_dev *mdev,
+ struct mlx5_macsec_obj_attrs *attrs,
+ bool is_tx,
+ u32 *macsec_obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_macsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ void *aso_ctx;
+ void *obj;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_macsec_obj_in, in, macsec_object);
+ aso_ctx = MLX5_ADDR_OF(macsec_offload_obj, obj, macsec_aso);
+
+ MLX5_SET(macsec_offload_obj, obj, confidentiality_en, attrs->encrypt);
+ MLX5_SET(macsec_offload_obj, obj, dekn, attrs->enc_key_id);
+ MLX5_SET(macsec_offload_obj, obj, aso_return_reg, MLX5_MACSEC_ASO_REG_C_4_5);
+ MLX5_SET(macsec_offload_obj, obj, macsec_aso_access_pd, attrs->aso_pdn);
+ MLX5_SET(macsec_aso, aso_ctx, mode_parameter, attrs->next_pn);
+
+ /* Epn */
+ if (attrs->epn_state.epn_enabled) {
+ void *salt_p;
+ int i;
+
+ MLX5_SET(macsec_aso, aso_ctx, epn_event_arm, 1);
+ MLX5_SET(macsec_offload_obj, obj, epn_en, 1);
+ MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb);
+ MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap);
+ MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)attrs->ssci);
+ salt_p = MLX5_ADDR_OF(macsec_offload_obj, obj, salt);
+ for (i = 0; i < 3 ; i++)
+ memcpy((u32 *)salt_p + i, &attrs->salt.bytes[4 * (2 - i)], 4);
+ } else {
+ MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)(attrs->sci));
+ }
+
+ MLX5_SET(macsec_aso, aso_ctx, valid, 0x1);
+ if (is_tx) {
+ MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_INC_SN);
+ } else {
+ err = macsec_set_replay_protection(attrs, aso_ctx);
+ if (err)
+ return err;
+ }
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev,
+ "MACsec offload: Failed to create MACsec object (err = %d)\n",
+ err);
+ return err;
+ }
+
+ *macsec_obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *sa,
+ bool is_tx)
+{
+ int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ if ((is_tx) && sa->fs_id) {
+ /* Make sure ongoing datapath readers sees a valid SA */
+ rhashtable_remove_fast(&macsec->sci_hash, &sa->hash, rhash_sci);
+ sa->fs_id = 0;
+ }
+
+ if (!sa->macsec_rule)
+ return;
+
+ mlx5e_macsec_fs_del_rule(macsec->macsec_fs, sa->macsec_rule, action);
+ mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id);
+ sa->macsec_rule = NULL;
+}
+
+static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
+ struct mlx5e_macsec_sa *sa,
+ bool encrypt,
+ bool is_tx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5_macsec_rule_attrs rule_attrs;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_macsec_obj_attrs obj_attrs;
+ union mlx5e_macsec_rule *macsec_rule;
+ struct macsec_key *key;
+ int err;
+
+ obj_attrs.next_pn = sa->next_pn;
+ obj_attrs.sci = cpu_to_be64((__force u64)sa->sci);
+ obj_attrs.enc_key_id = sa->enc_key_id;
+ obj_attrs.encrypt = encrypt;
+ obj_attrs.aso_pdn = macsec->aso.pdn;
+ obj_attrs.epn_state = sa->epn_state;
+
+ if (is_tx) {
+ obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci);
+ key = &ctx->sa.tx_sa->key;
+ } else {
+ obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
+ key = &ctx->sa.rx_sa->key;
+ }
+
+ memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
+ obj_attrs.replay_window = ctx->secy->replay_window;
+ obj_attrs.replay_protect = ctx->secy->replay_protect;
+
+ err = mlx5e_macsec_create_object(mdev, &obj_attrs, is_tx, &sa->macsec_obj_id);
+ if (err)
+ return err;
+
+ rule_attrs.macsec_obj_id = sa->macsec_obj_id;
+ rule_attrs.sci = sa->sci;
+ rule_attrs.assoc_num = sa->assoc_num;
+ rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ macsec_rule = mlx5e_macsec_fs_add_rule(macsec->macsec_fs, ctx, &rule_attrs, &sa->fs_id);
+ if (!macsec_rule) {
+ err = -ENOMEM;
+ goto destroy_macsec_object;
+ }
+
+ sa->macsec_rule = macsec_rule;
+
+ if (is_tx) {
+ err = rhashtable_insert_fast(&macsec->sci_hash, &sa->hash, rhash_sci);
+ if (err)
+ goto destroy_macsec_object_and_rule;
+ }
+
+ return 0;
+
+destroy_macsec_object_and_rule:
+ mlx5e_macsec_cleanup_sa(macsec, sa, is_tx);
+destroy_macsec_object:
+ mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id);
+
+ return err;
+}
+
+static struct mlx5e_macsec_rx_sc *
+mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci)
+{
+ struct mlx5e_macsec_rx_sc *iter;
+
+ list_for_each_entry_rcu(iter, list, rx_sc_list_element) {
+ if (iter->sci == sci)
+ return iter;
+ }
+
+ return NULL;
+}
+
+static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *rx_sa,
+ bool active)
+{
+ struct mlx5_core_dev *mdev = macsec->mdev;
+ struct mlx5_macsec_obj_attrs attrs = {};
+ int err = 0;
+
+ if (rx_sa->active != active)
+ return 0;
+
+ rx_sa->active = active;
+ if (!active) {
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ return 0;
+ }
+
+ attrs.sci = cpu_to_be64((__force u64)rx_sa->sci);
+ attrs.enc_key_id = rx_sa->enc_key_id;
+ err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx)
+{
+ const struct net_device *netdev = ctx->netdev;
+ const struct macsec_secy *secy = ctx->secy;
+
+ if (secy->validate_frames != MACSEC_VALIDATE_STRICT) {
+ netdev_err(netdev,
+ "MACsec offload is supported only when validate_frame is in strict mode\n");
+ return false;
+ }
+
+ if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) {
+ netdev_err(netdev, "MACsec offload is supported only when icv_len is %d\n",
+ MACSEC_DEFAULT_ICV_LEN);
+ return false;
+ }
+
+ if (!secy->protect_frames) {
+ netdev_err(netdev,
+ "MACsec offload is supported only when protect_frames is set\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5e_macsec_device *
+mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec,
+ const struct macsec_context *ctx)
+{
+ struct mlx5e_macsec_device *iter;
+ const struct list_head *list;
+
+ list = &macsec->macsec_device_list_head;
+ list_for_each_entry_rcu(iter, list, macsec_device_list_element) {
+ if (iter->netdev == ctx->secy->netdev)
+ return iter;
+ }
+
+ return NULL;
+}
+
+static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key,
+ const pn_t *next_pn_halves)
+{
+ struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state;
+
+ sa->salt = key->salt;
+ epn_state->epn_enabled = 1;
+ epn_state->epn_msb = next_pn_halves->upper;
+ epn_state->overlap = next_pn_halves->lower < MLX5_MACSEC_EPN_SCOPE_MID ? 0 : 1;
+}
+
+static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_secy *secy = ctx->secy;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (macsec_device->tx_sa[assoc_num]) {
+ netdev_err(ctx->netdev, "MACsec offload tx_sa: %d already exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ tx_sa = kzalloc(sizeof(*tx_sa), GFP_KERNEL);
+ if (!tx_sa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tx_sa->active = ctx_tx_sa->active;
+ tx_sa->next_pn = ctx_tx_sa->next_pn_halves.lower;
+ tx_sa->sci = secy->sci;
+ tx_sa->assoc_num = assoc_num;
+
+ if (secy->xpn)
+ update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves);
+
+ err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len,
+ MLX5_ACCEL_OBJ_MACSEC_KEY,
+ &tx_sa->enc_key_id);
+ if (err)
+ goto destroy_sa;
+
+ macsec_device->tx_sa[assoc_num] = tx_sa;
+ if (!secy->operational ||
+ assoc_num != tx_sc->encoding_sa ||
+ !tx_sa->active)
+ goto out;
+
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto destroy_encryption_key;
+
+ mutex_unlock(&macsec->lock);
+
+ return 0;
+
+destroy_encryption_key:
+ macsec_device->tx_sa[assoc_num] = NULL;
+ mlx5_destroy_encryption_key(mdev, tx_sa->enc_key_id);
+destroy_sa:
+ kfree(tx_sa);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ struct net_device *netdev;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ netdev = ctx->netdev;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ tx_sa = macsec_device->tx_sa[assoc_num];
+ if (!tx_sa) {
+ netdev_err(netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (tx_sa->next_pn != ctx_tx_sa->next_pn_halves.lower) {
+ netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n",
+ assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (tx_sa->active == ctx_tx_sa->active)
+ goto out;
+
+ if (tx_sa->assoc_num != tx_sc->encoding_sa)
+ goto out;
+
+ if (ctx_tx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto out;
+ } else {
+ if (!tx_sa->macsec_rule) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ }
+
+ tx_sa->active = ctx_tx_sa->active;
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_txsa(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ tx_sa = macsec_device->tx_sa[assoc_num];
+ if (!tx_sa) {
+ netdev_err(ctx->netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
+ kfree_rcu(tx_sa);
+ macsec_device->tx_sa[assoc_num] = NULL;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t *sci)
+{
+ struct mlx5e_macsec_sa *macsec_sa;
+ u32 fs_id = 0;
+
+ rcu_read_lock();
+ macsec_sa = rhashtable_lookup(sci_hash, sci, rhash_sci);
+ if (macsec_sa)
+ fs_id = macsec_sa->fs_id;
+ rcu_read_unlock();
+
+ return fs_id;
+}
+
+static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct list_head *rx_sc_list;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sc_list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(rx_sc_list, ctx_rx_sc->sci);
+ if (rx_sc) {
+ netdev_err(ctx->netdev, "MACsec offload: rx_sc (sci %lld) already exists\n",
+ ctx_rx_sc->sci);
+ err = -EEXIST;
+ goto out;
+ }
+
+ rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL);
+ if (!rx_sc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sc_xarray_element = kzalloc(sizeof(*sc_xarray_element), GFP_KERNEL);
+ if (!sc_xarray_element) {
+ err = -ENOMEM;
+ goto destroy_rx_sc;
+ }
+
+ sc_xarray_element->rx_sc = rx_sc;
+ err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element,
+ XA_LIMIT(1, USHRT_MAX), GFP_KERNEL);
+ if (err)
+ goto destroy_sc_xarray_elemenet;
+
+ rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL);
+ if (!rx_sc->md_dst) {
+ err = -ENOMEM;
+ goto erase_xa_alloc;
+ }
+
+ rx_sc->sci = ctx_rx_sc->sci;
+ rx_sc->active = ctx_rx_sc->active;
+ list_add_rcu(&rx_sc->rx_sc_list_element, rx_sc_list);
+
+ rx_sc->sc_xarray_element = sc_xarray_element;
+ rx_sc->md_dst->u.macsec_info.sci = rx_sc->sci;
+ mutex_unlock(&macsec->lock);
+
+ return 0;
+
+erase_xa_alloc:
+ xa_erase(&macsec->sc_xarray, sc_xarray_element->fs_id);
+destroy_sc_xarray_elemenet:
+ kfree(sc_xarray_element);
+destroy_rx_sc:
+ kfree(rx_sc);
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int i;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx_rx_sc->sci);
+ if (!rx_sc) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sc->active = ctx_rx_sc->active;
+ if (rx_sc->active == ctx_rx_sc->active)
+ goto out;
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active);
+ if (err)
+ goto out;
+ }
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx->rx_sc->sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+
+ kfree(rx_sa);
+ rx_sc->rx_sa[i] = NULL;
+ }
+
+/*
+ * At this point the relevant MACsec offload Rx rule already removed at
+ * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current
+ * Rx related data propagating using xa_erase which uses rcu to sync,
+ * once fs_id is erased then this rx_sc is hidden from datapath.
+ */
+ list_del_rcu(&rx_sc->rx_sc_list_element);
+ xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id);
+ metadata_dst_free(rx_sc->md_dst);
+ kfree(rx_sc->sc_xarray_element);
+
+ kfree_rcu(rx_sc);
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ sci_t sci = ctx_rx_sa->sc->sci;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rx_sc->rx_sa[assoc_num]) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d already exist\n",
+ sci, assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ rx_sa = kzalloc(sizeof(*rx_sa), GFP_KERNEL);
+ if (!rx_sa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rx_sa->active = ctx_rx_sa->active;
+ rx_sa->next_pn = ctx_rx_sa->next_pn;
+ rx_sa->sci = sci;
+ rx_sa->assoc_num = assoc_num;
+ rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id;
+
+ if (ctx->secy->xpn)
+ update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves);
+
+ err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len,
+ MLX5_ACCEL_OBJ_MACSEC_KEY,
+ &rx_sa->enc_key_id);
+ if (err)
+ goto destroy_sa;
+
+ rx_sc->rx_sa[assoc_num] = rx_sa;
+ if (!rx_sa->active)
+ goto out;
+
+ //TODO - add support for both authentication and encryption flows
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
+ if (err)
+ goto destroy_encryption_key;
+
+ goto out;
+
+destroy_encryption_key:
+ rx_sc->rx_sa[assoc_num] = NULL;
+ mlx5_destroy_encryption_key(mdev, rx_sa->enc_key_id);
+destroy_sa:
+ kfree(rx_sa);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ sci_t sci = ctx_rx_sa->sc->sci;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sa = rx_sc->rx_sa[assoc_num];
+ if (!rx_sa) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n",
+ sci, assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rx_sa->next_pn != ctx_rx_sa->next_pn_halves.lower) {
+ netdev_err(ctx->netdev,
+ "MACsec offload update RX sa %d PN isn't supported\n",
+ assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ sci_t sci = ctx->sa.rx_sa->sc->sci;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sa = rx_sc->rx_sa[assoc_num];
+ if (!rx_sa) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n",
+ sci, assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+ kfree(rx_sa);
+ rx_sc->rx_sa[assoc_num] = NULL;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_add_secy(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ const struct net_device *netdev = ctx->netdev;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ if (!mlx5e_macsec_secy_features_validate(ctx))
+ return -EINVAL;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ if (mlx5e_macsec_get_macsec_device_context(macsec, ctx)) {
+ netdev_err(netdev, "MACsec offload: MACsec net_device already exist\n");
+ goto out;
+ }
+
+ if (macsec->num_of_devices >= MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES) {
+ netdev_err(netdev, "Currently, only %d MACsec offload devices can be set\n",
+ MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES);
+ err = -EBUSY;
+ goto out;
+ }
+
+ macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+ if (!macsec_device) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ macsec_device->dev_addr = kmemdup(dev->dev_addr, dev->addr_len, GFP_KERNEL);
+ if (!macsec_device->dev_addr) {
+ kfree(macsec_device);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ macsec_device->netdev = dev;
+
+ INIT_LIST_HEAD_RCU(&macsec_device->macsec_rx_sc_list_head);
+ list_add_rcu(&macsec_device->macsec_device_list_element, &macsec->macsec_device_list_head);
+
+ ++macsec->num_of_devices;
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
+ struct mlx5e_macsec_device *macsec_device)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct list_head *list;
+ int i, err = 0;
+
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa || !rx_sa->macsec_rule)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ }
+ }
+
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ if (rx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false);
+ if (err)
+ goto out;
+ }
+ }
+ }
+
+ memcpy(macsec_device->dev_addr, dev->dev_addr, dev->addr_len);
+out:
+ return err;
+}
+
+/* this function is called from 2 macsec ops functions:
+ * macsec_set_mac_address – MAC address was changed, therefore we need to destroy
+ * and create new Tx contexts(macsec object + steering).
+ * macsec_changelink – in this case the tx SC or SecY may be changed, therefore need to
+ * destroy Tx and Rx contexts(macsec object + steering)
+ */
+static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int i, err = 0;
+
+ if (!mlx5e_macsec_secy_features_validate(ctx))
+ return -EINVAL;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* if the dev_addr hasn't change, it mean the callback is from macsec_changelink */
+ if (!memcmp(macsec_device->dev_addr, dev->dev_addr, dev->addr_len)) {
+ err = macsec_upd_secy_hw_address(ctx, macsec_device);
+ if (err)
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
+ kfree(tx_sa);
+ macsec_device->tx_sa[i] = NULL;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+ kfree(rx_sa);
+ rx_sc->rx_sa[i] = NULL;
+ }
+
+ list_del_rcu(&rx_sc->rx_sc_list_element);
+
+ kfree_rcu(rx_sc);
+ }
+
+ kfree(macsec_device->dev_addr);
+ macsec_device->dev_addr = NULL;
+
+ list_del_rcu(&macsec_device->macsec_device_list_element);
+ --macsec->num_of_devices;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static void macsec_build_accel_attrs(struct mlx5e_macsec_sa *sa,
+ struct mlx5_macsec_obj_attrs *attrs)
+{
+ attrs->epn_state.epn_msb = sa->epn_state.epn_msb;
+ attrs->epn_state.overlap = sa->epn_state.overlap;
+}
+
+static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso,
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl,
+ struct mlx5_aso_ctrl_param *param)
+{
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ if (macsec_aso->umr->dma_addr) {
+ aso_ctrl->va_l = cpu_to_be32(macsec_aso->umr->dma_addr | ASO_CTRL_READ_EN);
+ aso_ctrl->va_h = cpu_to_be32((u64)macsec_aso->umr->dma_addr >> 32);
+ aso_ctrl->l_key = cpu_to_be32(macsec_aso->umr->mkey);
+ }
+
+ if (!param)
+ return;
+
+ aso_ctrl->data_mask_mode = param->data_mask_mode << 6;
+ aso_ctrl->condition_1_0_operand = param->condition_1_operand |
+ param->condition_0_operand << 4;
+ aso_ctrl->condition_1_0_offset = param->condition_1_offset |
+ param->condition_0_offset << 4;
+ aso_ctrl->data_offset_condition_operand = param->data_offset |
+ param->condition_operand << 6;
+ aso_ctrl->condition_0_data = cpu_to_be32(param->condition_0_data);
+ aso_ctrl->condition_0_mask = cpu_to_be32(param->condition_0_mask);
+ aso_ctrl->condition_1_data = cpu_to_be32(param->condition_1_data);
+ aso_ctrl->condition_1_mask = cpu_to_be32(param->condition_1_mask);
+ aso_ctrl->bitwise_data = cpu_to_be64(param->bitwise_data);
+ aso_ctrl->data_mask = cpu_to_be64(param->data_mask);
+}
+
+static int mlx5e_macsec_modify_obj(struct mlx5_core_dev *mdev, struct mlx5_macsec_obj_attrs *attrs,
+ u32 macsec_id)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_macsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(query_macsec_obj_out)];
+ u64 modify_field_select = 0;
+ void *obj;
+ int err;
+
+ /* General object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_id);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev, "Query MACsec object failed (Object id %d), err = %d\n",
+ macsec_id, err);
+ return err;
+ }
+
+ obj = MLX5_ADDR_OF(query_macsec_obj_out, out, macsec_object);
+ modify_field_select = MLX5_GET64(macsec_offload_obj, obj, modify_field_select);
+
+ /* EPN */
+ if (!(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP) ||
+ !(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB)) {
+ mlx5_core_dbg(mdev, "MACsec object field is not modifiable (Object id %d)\n",
+ macsec_id);
+ return -EOPNOTSUPP;
+ }
+
+ obj = MLX5_ADDR_OF(modify_macsec_obj_in, in, macsec_object);
+ MLX5_SET64(macsec_offload_obj, obj, modify_field_select,
+ MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP | MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB);
+ MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb);
+ MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap);
+
+ /* General object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void macsec_aso_build_ctrl(struct mlx5e_macsec_aso *aso,
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl,
+ struct mlx5e_macsec_aso_in *in)
+{
+ struct mlx5_aso_ctrl_param param = {};
+
+ param.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT;
+ param.condition_0_operand = MLX5_ASO_ALWAYS_TRUE;
+ param.condition_1_operand = MLX5_ASO_ALWAYS_TRUE;
+ if (in->mode == MLX5_MACSEC_EPN) {
+ param.data_offset = MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ param.bitwise_data = BIT_ULL(54);
+ param.data_mask = param.bitwise_data;
+ }
+ macsec_aso_build_wqe_ctrl_seg(aso, aso_ctrl, &param);
+}
+
+static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_aso_in *in)
+{
+ struct mlx5e_macsec_aso *aso;
+ struct mlx5_aso_wqe *aso_wqe;
+ struct mlx5_aso *maso;
+ int err;
+
+ aso = &macsec->aso;
+ maso = aso->maso;
+
+ mutex_lock(&aso->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(maso);
+ mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC);
+ macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in);
+ mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
+ err = mlx5_aso_poll_cq(maso, false);
+ mutex_unlock(&aso->aso_lock);
+
+ return err;
+}
+
+static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_aso_in *in, struct mlx5e_macsec_aso_out *out)
+{
+ struct mlx5e_macsec_aso *aso;
+ struct mlx5_aso_wqe *aso_wqe;
+ struct mlx5_aso *maso;
+ int err;
+
+ aso = &macsec->aso;
+ maso = aso->maso;
+
+ mutex_lock(&aso->aso_lock);
+
+ aso_wqe = mlx5_aso_get_wqe(maso);
+ mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC);
+ macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL);
+
+ mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
+ err = mlx5_aso_poll_cq(maso, false);
+ if (err)
+ goto err_out;
+
+ if (MLX5_GET(macsec_aso, aso->umr->ctx, epn_event_arm))
+ out->event_arm |= MLX5E_ASO_EPN_ARM;
+
+ out->mode_param = MLX5_GET(macsec_aso, aso->umr->ctx, mode_parameter);
+
+err_out:
+ mutex_unlock(&aso->aso_lock);
+ return err;
+}
+
+static struct mlx5e_macsec_sa *get_macsec_tx_sa_from_obj_id(const struct mlx5e_macsec *macsec,
+ const u32 obj_id)
+{
+ const struct list_head *device_list;
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec_device *iter;
+ int i;
+
+ device_list = &macsec->macsec_device_list_head;
+
+ list_for_each_entry(iter, device_list, macsec_device_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ macsec_sa = iter->tx_sa[i];
+ if (!macsec_sa || !macsec_sa->active)
+ continue;
+ if (macsec_sa->macsec_obj_id == obj_id)
+ return macsec_sa;
+ }
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_macsec_sa *get_macsec_rx_sa_from_obj_id(const struct mlx5e_macsec *macsec,
+ const u32 obj_id)
+{
+ const struct list_head *device_list, *sc_list;
+ struct mlx5e_macsec_rx_sc *mlx5e_rx_sc;
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec_device *iter;
+ int i;
+
+ device_list = &macsec->macsec_device_list_head;
+
+ list_for_each_entry(iter, device_list, macsec_device_list_element) {
+ sc_list = &iter->macsec_rx_sc_list_head;
+ list_for_each_entry(mlx5e_rx_sc, sc_list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ macsec_sa = mlx5e_rx_sc->rx_sa[i];
+ if (!macsec_sa || !macsec_sa->active)
+ continue;
+ if (macsec_sa->macsec_obj_id == obj_id)
+ return macsec_sa;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void macsec_epn_update(struct mlx5e_macsec *macsec, struct mlx5_core_dev *mdev,
+ struct mlx5e_macsec_sa *sa, u32 obj_id, u32 mode_param)
+{
+ struct mlx5_macsec_obj_attrs attrs = {};
+ struct mlx5e_macsec_aso_in in = {};
+
+ /* When the bottom of the replay protection window (mode_param) crosses 2^31 (half sequence
+ * number wraparound) hence mode_param > MLX5_MACSEC_EPN_SCOPE_MID the SW should update the
+ * esn_overlap to OLD (1).
+ * When the bottom of the replay protection window (mode_param) crosses 2^32 (full sequence
+ * number wraparound) hence mode_param < MLX5_MACSEC_EPN_SCOPE_MID since it did a
+ * wraparound, the SW should update the esn_overlap to NEW (0), and increment the esn_msb.
+ */
+
+ if (mode_param < MLX5_MACSEC_EPN_SCOPE_MID) {
+ sa->epn_state.epn_msb++;
+ sa->epn_state.overlap = 0;
+ } else {
+ sa->epn_state.overlap = 1;
+ }
+
+ macsec_build_accel_attrs(sa, &attrs);
+ mlx5e_macsec_modify_obj(mdev, &attrs, obj_id);
+
+ /* Re-set EPN arm event */
+ in.obj_id = obj_id;
+ in.mode = MLX5_MACSEC_EPN;
+ macsec_aso_set_arm_event(mdev, macsec, &in);
+}
+
+static void macsec_async_event(struct work_struct *work)
+{
+ struct mlx5e_macsec_async_work *async_work;
+ struct mlx5e_macsec_aso_out out = {};
+ struct mlx5e_macsec_aso_in in = {};
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec *macsec;
+ struct mlx5_core_dev *mdev;
+ u32 obj_id;
+
+ async_work = container_of(work, struct mlx5e_macsec_async_work, work);
+ macsec = async_work->macsec;
+ mdev = async_work->mdev;
+ obj_id = async_work->obj_id;
+ macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id);
+ if (!macsec_sa) {
+ macsec_sa = get_macsec_rx_sa_from_obj_id(macsec, obj_id);
+ if (!macsec_sa) {
+ mlx5_core_dbg(mdev, "MACsec SA is not found (SA object id %d)\n", obj_id);
+ goto out_async_work;
+ }
+ }
+
+ /* Query MACsec ASO context */
+ in.obj_id = obj_id;
+ macsec_aso_query(mdev, macsec, &in, &out);
+
+ /* EPN case */
+ if (macsec_sa->epn_state.epn_enabled && !(out.event_arm & MLX5E_ASO_EPN_ARM))
+ macsec_epn_update(macsec, mdev, macsec_sa, obj_id, out.mode_param);
+
+out_async_work:
+ kfree(async_work);
+}
+
+static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_macsec *macsec = container_of(nb, struct mlx5e_macsec, nb);
+ struct mlx5e_macsec_async_work *async_work;
+ struct mlx5_eqe_obj_change *obj_change;
+ struct mlx5_eqe *eqe = data;
+ u16 obj_type;
+ u32 obj_id;
+
+ if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE)
+ return NOTIFY_DONE;
+
+ obj_change = &eqe->data.obj_change;
+ obj_type = be16_to_cpu(obj_change->obj_type);
+ obj_id = be32_to_cpu(obj_change->obj_id);
+
+ if (obj_type != MLX5_GENERAL_OBJECT_TYPES_MACSEC)
+ return NOTIFY_DONE;
+
+ async_work = kzalloc(sizeof(*async_work), GFP_ATOMIC);
+ if (!async_work)
+ return NOTIFY_DONE;
+
+ async_work->macsec = macsec;
+ async_work->mdev = macsec->mdev;
+ async_work->obj_id = obj_id;
+
+ INIT_WORK(&async_work->work, macsec_async_event);
+
+ WARN_ON(!queue_work(macsec->wq, &async_work->work));
+
+ return NOTIFY_OK;
+}
+
+static int mlx5e_macsec_aso_init(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev)
+{
+ struct mlx5_aso *maso;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &aso->pdn);
+ if (err) {
+ mlx5_core_err(mdev,
+ "MACsec offload: Failed to alloc pd for MACsec ASO, err=%d\n",
+ err);
+ return err;
+ }
+
+ maso = mlx5_aso_create(mdev, aso->pdn);
+ if (IS_ERR(maso)) {
+ err = PTR_ERR(maso);
+ goto err_aso;
+ }
+
+ err = mlx5e_macsec_aso_reg_mr(mdev, aso);
+ if (err)
+ goto err_aso_reg;
+
+ mutex_init(&aso->aso_lock);
+
+ aso->maso = maso;
+
+ return 0;
+
+err_aso_reg:
+ mlx5_aso_destroy(maso);
+err_aso:
+ mlx5_core_dealloc_pd(mdev, aso->pdn);
+ return err;
+}
+
+static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev)
+{
+ if (!aso)
+ return;
+
+ mlx5e_macsec_aso_dereg_mr(mdev, aso);
+
+ mlx5_aso_destroy(aso->maso);
+
+ mlx5_core_dealloc_pd(mdev, aso->pdn);
+}
+
+bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev)
+{
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt))
+ return false;
+
+ return true;
+}
+
+void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats)
+{
+ mlx5e_macsec_fs_get_stats_fill(macsec->macsec_fs, macsec_stats);
+}
+
+struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec)
+{
+ if (!macsec)
+ return NULL;
+
+ return &macsec->stats;
+}
+
+static const struct macsec_ops macsec_offload_ops = {
+ .mdo_add_txsa = mlx5e_macsec_add_txsa,
+ .mdo_upd_txsa = mlx5e_macsec_upd_txsa,
+ .mdo_del_txsa = mlx5e_macsec_del_txsa,
+ .mdo_add_rxsc = mlx5e_macsec_add_rxsc,
+ .mdo_upd_rxsc = mlx5e_macsec_upd_rxsc,
+ .mdo_del_rxsc = mlx5e_macsec_del_rxsc,
+ .mdo_add_rxsa = mlx5e_macsec_add_rxsa,
+ .mdo_upd_rxsa = mlx5e_macsec_upd_rxsa,
+ .mdo_del_rxsa = mlx5e_macsec_del_rxsa,
+ .mdo_add_secy = mlx5e_macsec_add_secy,
+ .mdo_upd_secy = mlx5e_macsec_upd_secy,
+ .mdo_del_secy = mlx5e_macsec_del_secy,
+};
+
+bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ u32 fs_id;
+
+ fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci);
+ if (!fs_id)
+ goto err_out;
+
+ return true;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return false;
+}
+
+void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ u32 fs_id;
+
+ fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci);
+ if (!fs_id)
+ return;
+
+ eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2);
+}
+
+void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec *macsec;
+ u32 fs_id;
+
+ macsec = priv->macsec;
+ if (!macsec)
+ return;
+
+ fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data);
+
+ rcu_read_lock();
+ sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id);
+ rx_sc = sc_xarray_element->rx_sc;
+ if (rx_sc) {
+ dst_hold(&rx_sc->md_dst->dst);
+ skb_dst_set(skb, &rx_sc->md_dst->dst);
+ }
+
+ rcu_read_unlock();
+}
+
+void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return;
+
+ /* Enable MACsec */
+ mlx5_core_dbg(priv->mdev, "mlx5e: MACsec acceleration enabled\n");
+ netdev->macsec_ops = &macsec_offload_ops;
+ netdev->features |= NETIF_F_HW_MACSEC;
+ netif_keep_dst(netdev);
+}
+
+int mlx5e_macsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_macsec *macsec = NULL;
+ struct mlx5e_macsec_fs *macsec_fs;
+ int err;
+
+ if (!mlx5e_is_macsec_device(priv->mdev)) {
+ mlx5_core_dbg(mdev, "Not a MACsec offload device\n");
+ return 0;
+ }
+
+ macsec = kzalloc(sizeof(*macsec), GFP_KERNEL);
+ if (!macsec)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&macsec->macsec_device_list_head);
+ mutex_init(&macsec->lock);
+
+ err = rhashtable_init(&macsec->sci_hash, &rhash_sci);
+ if (err) {
+ mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n",
+ err);
+ goto err_hash;
+ }
+
+ err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev);
+ if (err) {
+ mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err);
+ goto err_aso;
+ }
+
+ macsec->wq = alloc_ordered_workqueue("mlx5e_macsec_%s", 0, priv->netdev->name);
+ if (!macsec->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ xa_init_flags(&macsec->sc_xarray, XA_FLAGS_ALLOC1);
+
+ priv->macsec = macsec;
+
+ macsec->mdev = mdev;
+
+ macsec_fs = mlx5e_macsec_fs_init(mdev, priv->netdev);
+ if (!macsec_fs) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ macsec->macsec_fs = macsec_fs;
+
+ macsec->nb.notifier_call = macsec_obj_change_event;
+ mlx5_notifier_register(mdev, &macsec->nb);
+
+ mlx5_core_dbg(mdev, "MACsec attached to netdevice\n");
+
+ return 0;
+
+err_out:
+ destroy_workqueue(macsec->wq);
+err_wq:
+ mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev);
+err_aso:
+ rhashtable_destroy(&macsec->sci_hash);
+err_hash:
+ kfree(macsec);
+ priv->macsec = NULL;
+ return err;
+}
+
+void mlx5e_macsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!macsec)
+ return;
+
+ mlx5_notifier_unregister(mdev, &macsec->nb);
+ mlx5e_macsec_fs_cleanup(macsec->macsec_fs);
+ destroy_workqueue(macsec->wq);
+ mlx5e_macsec_aso_cleanup(&macsec->aso, mdev);
+ rhashtable_destroy(&macsec->sci_hash);
+ mutex_destroy(&macsec->lock);
+ kfree(macsec);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
new file mode 100644
index 000000000000..d580b4a91253
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_ACCEL_MACSEC_H__
+#define __MLX5_EN_ACCEL_MACSEC_H__
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+
+#include <linux/mlx5/driver.h>
+#include <net/macsec.h>
+#include <net/dst_metadata.h>
+
+/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */
+#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1)
+#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0))
+
+struct mlx5e_priv;
+struct mlx5e_macsec;
+
+struct mlx5e_macsec_stats {
+ u64 macsec_rx_pkts;
+ u64 macsec_rx_bytes;
+ u64 macsec_rx_pkts_drop;
+ u64 macsec_rx_bytes_drop;
+ u64 macsec_tx_pkts;
+ u64 macsec_tx_bytes;
+ u64 macsec_tx_pkts_drop;
+ u64 macsec_tx_bytes_drop;
+};
+
+void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_macsec_init(struct mlx5e_priv *priv);
+void mlx5e_macsec_cleanup(struct mlx5e_priv *priv);
+bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb);
+void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
+
+static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+ return md_dst && (md_dst->type == METADATA_MACSEC);
+}
+
+static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe)
+{
+ return MLX5_MACSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
+}
+
+void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe);
+bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev);
+void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats);
+struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec);
+
+#else
+
+static inline void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) {}
+static inline int mlx5e_macsec_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) {}
+static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) { return false; }
+static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; }
+static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{}
+static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) { return false; }
+#endif /* CONFIG_MLX5_EN_MACSEC */
+
+#endif /* __MLX5_ACCEL_EN_MACSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
new file mode 100644
index 000000000000..1ac0cf04e811
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -0,0 +1,1384 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/macsec.h>
+#include <linux/netdevice.h>
+#include <linux/mlx5/qp.h>
+#include "fs_core.h"
+#include "en/fs.h"
+#include "en_accel/macsec_fs.h"
+#include "mlx5_core.h"
+
+/* MACsec TX flow steering */
+#define CRYPTO_NUM_MAXSEC_FTE BIT(15)
+#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1
+
+#define TX_CRYPTO_TABLE_LEVEL 0
+#define TX_CRYPTO_TABLE_NUM_GROUPS 3
+#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1
+#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \
+ (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \
+ CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE))
+#define TX_CHECK_TABLE_LEVEL 1
+#define TX_CHECK_TABLE_NUM_FTE 2
+#define RX_CRYPTO_TABLE_LEVEL 0
+#define RX_CHECK_TABLE_LEVEL 1
+#define RX_CHECK_TABLE_NUM_FTE 3
+#define RX_CRYPTO_TABLE_NUM_GROUPS 3
+#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \
+ ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2)
+#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \
+ (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE)
+#define RX_NUM_OF_RULES_PER_SA 2
+
+#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET)
+#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8
+#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN)
+
+/* MACsec RX flow steering */
+#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E
+
+struct mlx5_sectag_header {
+ __be16 ethertype;
+ u8 tci_an;
+ u8 sl;
+ u32 pn;
+ u8 sci[MACSEC_SCI_LEN]; /* optional */
+} __packed;
+
+struct mlx5e_macsec_tx_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ u32 fs_id;
+};
+
+struct mlx5e_macsec_tables {
+ struct mlx5e_flow_table ft_crypto;
+ struct mlx5_flow_handle *crypto_miss_rule;
+
+ struct mlx5_flow_table *ft_check;
+ struct mlx5_flow_group *ft_check_group;
+ struct mlx5_fc *check_miss_rule_counter;
+ struct mlx5_flow_handle *check_miss_rule;
+ struct mlx5_fc *check_rule_counter;
+
+ u32 refcnt;
+};
+
+struct mlx5e_macsec_tx {
+ struct mlx5_flow_handle *crypto_mke_rule;
+ struct mlx5_flow_handle *check_rule;
+
+ struct ida tx_halloc;
+
+ struct mlx5e_macsec_tables tables;
+};
+
+struct mlx5e_macsec_rx_rule {
+ struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA];
+ struct mlx5_modify_hdr *meta_modhdr;
+};
+
+struct mlx5e_macsec_rx {
+ struct mlx5_flow_handle *check_rule[2];
+ struct mlx5_pkt_reformat *check_rule_pkt_reformat[2];
+
+ struct mlx5e_macsec_tables tables;
+};
+
+union mlx5e_macsec_rule {
+ struct mlx5e_macsec_tx_rule tx_rule;
+ struct mlx5e_macsec_rx_rule rx_rule;
+};
+
+struct mlx5e_macsec_fs {
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_macsec_tx *tx_fs;
+ struct mlx5e_macsec_rx *rx_fs;
+};
+
+static void macsec_fs_tx_destroy(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5e_macsec_tables *tx_tables;
+
+ tx_tables = &tx_fs->tables;
+
+ /* Tx check table */
+ if (tx_fs->check_rule) {
+ mlx5_del_flow_rules(tx_fs->check_rule);
+ tx_fs->check_rule = NULL;
+ }
+
+ if (tx_tables->check_miss_rule) {
+ mlx5_del_flow_rules(tx_tables->check_miss_rule);
+ tx_tables->check_miss_rule = NULL;
+ }
+
+ if (tx_tables->ft_check_group) {
+ mlx5_destroy_flow_group(tx_tables->ft_check_group);
+ tx_tables->ft_check_group = NULL;
+ }
+
+ if (tx_tables->ft_check) {
+ mlx5_destroy_flow_table(tx_tables->ft_check);
+ tx_tables->ft_check = NULL;
+ }
+
+ /* Tx crypto table */
+ if (tx_fs->crypto_mke_rule) {
+ mlx5_del_flow_rules(tx_fs->crypto_mke_rule);
+ tx_fs->crypto_mke_rule = NULL;
+ }
+
+ if (tx_tables->crypto_miss_rule) {
+ mlx5_del_flow_rules(tx_tables->crypto_miss_rule);
+ tx_tables->crypto_miss_rule = NULL;
+ }
+
+ mlx5e_destroy_flow_table(&tx_tables->ft_crypto);
+}
+
+static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ /* Flow Group for MKE match */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for SA rules */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_table
+ *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags,
+ int level, int max_fte)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *fdb = NULL;
+
+ /* reserve entry for the match all miss group and rule */
+ ft_attr.autogroup.num_reserved_entries = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.prio = 0;
+ ft_attr.flags = flags;
+ ft_attr.level = level;
+ ft_attr.max_fte = max_fte;
+
+ fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+
+ return fdb;
+}
+
+static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_table *flow_table;
+ struct mlx5_flow_group *flow_group;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+ if (!ns)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ goto out_spec;
+
+ tx_tables = &tx_fs->tables;
+ ft_crypto = &tx_tables->ft_crypto;
+
+ /* Tx crypto table */
+ ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.level = TX_CRYPTO_TABLE_LEVEL;
+ ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+ flow_table = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "Failed to create MACsec Tx crypto table err(%d)\n", err);
+ goto out_flow_group;
+ }
+ ft_crypto->t = flow_table;
+
+ /* Tx crypto table groups */
+ err = macsec_fs_tx_create_crypto_table_groups(ft_crypto);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+
+ /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */
+ memset(&flow_act, 0, sizeof(flow_act));
+ memset(spec, 0, sizeof(*spec));
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec TX MKE rule, err=%d\n", err);
+ goto err;
+ }
+ tx_fs->crypto_mke_rule = rule;
+
+ /* Tx crypto table Default miss rule */
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec Tx table default miss rule %d\n", err);
+ goto err;
+ }
+ tx_tables->crypto_miss_rule = rule;
+
+ /* Tx check table */
+ flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL,
+ TX_CHECK_TABLE_NUM_FTE);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "fail to create MACsec TX check table, err(%d)\n", err);
+ goto err;
+ }
+ tx_tables->ft_check = flow_table;
+
+ /* Tx check table Default miss group/rule */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+ flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in);
+ if (IS_ERR(flow_group)) {
+ err = PTR_ERR(flow_group);
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+ tx_tables->ft_check_group = flow_group;
+
+ /* Tx check table default drop rule */
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err);
+ goto err;
+ }
+ tx_tables->check_miss_rule = rule;
+
+ /* Tx check table rule */
+ memset(spec, 0, sizeof(struct mlx5_flow_spec));
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ flow_act.flags = FLOW_ACT_NO_APPEND;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter);
+ rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec check rule, err=%d\n", err);
+ goto err;
+ }
+ tx_fs->check_rule = rule;
+
+ goto out_flow_group;
+
+err:
+ macsec_fs_tx_destroy(macsec_fs);
+out_flow_group:
+ kvfree(flow_group_in);
+out_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int macsec_fs_tx_ft_get(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5e_macsec_tables *tx_tables;
+ int err = 0;
+
+ tx_tables = &tx_fs->tables;
+ if (tx_tables->refcnt)
+ goto out;
+
+ err = macsec_fs_tx_create(macsec_fs);
+ if (err)
+ return err;
+
+out:
+ tx_tables->refcnt++;
+ return err;
+}
+
+static void macsec_fs_tx_ft_put(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+
+ if (--tx_tables->refcnt)
+ return;
+
+ macsec_fs_tx_destroy(macsec_fs);
+}
+
+static int macsec_fs_tx_setup_fte(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ u32 macsec_obj_id,
+ u32 *fs_id)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ int err = 0;
+ u32 id;
+
+ err = ida_alloc_range(&tx_fs->tx_halloc, 1,
+ MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES,
+ GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ id = err;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ /* Metadata match */
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC | id << 2);
+
+ *fs_id = id;
+ flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+ flow_act->crypto.obj_id = macsec_obj_id;
+
+ mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id);
+ return 0;
+}
+
+static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx,
+ char *reformatbf,
+ size_t *reformat_size)
+{
+ const struct macsec_secy *secy = ctx->secy;
+ bool sci_present = macsec_send_sci(secy);
+ struct mlx5_sectag_header sectag = {};
+ const struct macsec_tx_sc *tx_sc;
+
+ tx_sc = &secy->tx_sc;
+ sectag.ethertype = htons(ETH_P_MACSEC);
+
+ if (sci_present) {
+ sectag.tci_an |= MACSEC_TCI_SC;
+ memcpy(&sectag.sci, &secy->sci,
+ sizeof(sectag.sci));
+ } else {
+ if (tx_sc->end_station)
+ sectag.tci_an |= MACSEC_TCI_ES;
+ if (tx_sc->scb)
+ sectag.tci_an |= MACSEC_TCI_SCB;
+ }
+
+ /* With GCM, C/E clear for !encrypt, both set for encrypt */
+ if (tx_sc->encrypt)
+ sectag.tci_an |= MACSEC_TCI_CONFID;
+ else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN)
+ sectag.tci_an |= MACSEC_TCI_C;
+
+ sectag.tci_an |= tx_sc->encoding_sa;
+
+ *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0);
+
+ memcpy(reformatbf, &sectag, *reformat_size);
+}
+
+static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5e_macsec_tx_rule *tx_rule)
+{
+ if (tx_rule->rule) {
+ mlx5_del_flow_rules(tx_rule->rule);
+ tx_rule->rule = NULL;
+ }
+
+ if (tx_rule->pkt_reformat) {
+ mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat);
+ tx_rule->pkt_reformat = NULL;
+ }
+
+ if (tx_rule->fs_id) {
+ ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id);
+ tx_rule->fs_id = 0;
+ }
+
+ kfree(tx_rule);
+
+ macsec_fs_tx_ft_put(macsec_fs);
+}
+
+static union mlx5e_macsec_rule *
+macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id)
+{
+ char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN];
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ union mlx5e_macsec_rule *macsec_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5e_macsec_tx_rule *tx_rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ size_t reformat_size;
+ int err = 0;
+ u32 fs_id;
+
+ tx_tables = &tx_fs->tables;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ err = macsec_fs_tx_ft_get(macsec_fs);
+ if (err)
+ goto out_spec;
+
+ macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+ if (!macsec_rule) {
+ macsec_fs_tx_ft_put(macsec_fs);
+ goto out_spec;
+ }
+
+ tx_rule = &macsec_rule->tx_rule;
+
+ /* Tx crypto table crypto rule */
+ macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size);
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC;
+ reformat_params.size = reformat_size;
+ reformat_params.data = reformatbf;
+ flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+ if (IS_ERR(flow_act.pkt_reformat)) {
+ err = PTR_ERR(flow_act.pkt_reformat);
+ netdev_err(netdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err);
+ goto err;
+ }
+ tx_rule->pkt_reformat = flow_act.pkt_reformat;
+
+ err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, &fs_id);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+
+ tx_rule->fs_id = fs_id;
+ *sa_fs_id = fs_id;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = tx_tables->ft_check;
+ rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec TX crypto rule, err=%d\n", err);
+ goto err;
+ }
+ tx_rule->rule = rule;
+
+ goto out_spec;
+
+err:
+ macsec_fs_tx_del_rule(macsec_fs, tx_rule);
+ macsec_rule = NULL;
+out_spec:
+ kvfree(spec);
+
+ return macsec_rule;
+}
+
+static void macsec_fs_tx_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *tx_tables;
+
+ if (!tx_fs)
+ return;
+
+ tx_tables = &tx_fs->tables;
+ if (tx_tables->refcnt) {
+ netdev_err(macsec_fs->netdev,
+ "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n",
+ tx_tables->refcnt);
+ return;
+ }
+
+ ida_destroy(&tx_fs->tx_halloc);
+
+ if (tx_tables->check_miss_rule_counter) {
+ mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter);
+ tx_tables->check_miss_rule_counter = NULL;
+ }
+
+ if (tx_tables->check_rule_counter) {
+ mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+ tx_tables->check_rule_counter = NULL;
+ }
+
+ kfree(tx_fs);
+ macsec_fs->tx_fs = NULL;
+}
+
+static int macsec_fs_tx_init(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5e_macsec_tx *tx_fs;
+ struct mlx5_fc *flow_counter;
+ int err;
+
+ tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL);
+ if (!tx_fs)
+ return -ENOMEM;
+
+ tx_tables = &tx_fs->tables;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Tx encrypt flow counter, err(%d)\n",
+ err);
+ goto err_encrypt_counter;
+ }
+ tx_tables->check_rule_counter = flow_counter;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Tx drop flow counter, err(%d)\n",
+ err);
+ goto err_drop_counter;
+ }
+ tx_tables->check_miss_rule_counter = flow_counter;
+
+ ida_init(&tx_fs->tx_halloc);
+
+ macsec_fs->tx_fs = tx_fs;
+
+ return 0;
+
+err_drop_counter:
+ mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+ tx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+ kfree(tx_fs);
+ macsec_fs->tx_fs = NULL;
+
+ return err;
+}
+
+static void macsec_fs_rx_destroy(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct mlx5e_macsec_tables *rx_tables;
+ int i;
+
+ /* Rx check table */
+ for (i = 1; i >= 0; --i) {
+ if (rx_fs->check_rule[i]) {
+ mlx5_del_flow_rules(rx_fs->check_rule[i]);
+ rx_fs->check_rule[i] = NULL;
+ }
+
+ if (rx_fs->check_rule_pkt_reformat[i]) {
+ mlx5_packet_reformat_dealloc(macsec_fs->mdev,
+ rx_fs->check_rule_pkt_reformat[i]);
+ rx_fs->check_rule_pkt_reformat[i] = NULL;
+ }
+ }
+
+ rx_tables = &rx_fs->tables;
+
+ if (rx_tables->check_miss_rule) {
+ mlx5_del_flow_rules(rx_tables->check_miss_rule);
+ rx_tables->check_miss_rule = NULL;
+ }
+
+ if (rx_tables->ft_check_group) {
+ mlx5_destroy_flow_group(rx_tables->ft_check_group);
+ rx_tables->ft_check_group = NULL;
+ }
+
+ if (rx_tables->ft_check) {
+ mlx5_destroy_flow_table(rx_tables->ft_check);
+ rx_tables->ft_check = NULL;
+ }
+
+ /* Rx crypto table */
+ if (rx_tables->crypto_miss_rule) {
+ mlx5_del_flow_rules(rx_tables->crypto_miss_rule);
+ rx_tables->crypto_miss_rule = NULL;
+ }
+
+ mlx5e_destroy_flow_table(&rx_tables->ft_crypto);
+}
+
+static int macsec_fs_rx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ /* Flow group for SA rule with SCI */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS_5);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK <<
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+ MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2);
+ MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow group for SA rule without SCI */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS_5);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static int macsec_fs_rx_create_check_decap_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec,
+ int reformat_param_size)
+{
+ int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1;
+ u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI];
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ rx_tables = &rx_fs->tables;
+
+ /* Rx check table decap 16B rule */
+ memset(dest, 0, sizeof(*dest));
+ memset(flow_act, 0, sizeof(*flow_act));
+ memset(spec, 0, sizeof(*spec));
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC;
+ reformat_params.size = reformat_param_size;
+ reformat_params.data = mlx5_reformat_buf;
+ flow_act->pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+ if (IS_ERR(flow_act->pkt_reformat)) {
+ err = PTR_ERR(flow_act->pkt_reformat);
+ netdev_err(netdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err);
+ return err;
+ }
+ rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ /* MACsec syndrome match */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0);
+ /* ASO return reg syndrome match */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+ /* Sectag TCI SC present bit*/
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI)
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT <<
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ flow_act->flags = FLOW_ACT_NO_APPEND;
+ flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest->counter_id = mlx5_fc_id(rx_tables->check_rule_counter);
+ rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec Rx check rule, err=%d\n", err);
+ return err;
+ }
+
+ rx_fs->check_rule[rule_index] = rule;
+
+ return 0;
+}
+
+static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_table *flow_table;
+ struct mlx5_flow_group *flow_group;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+ if (!ns)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ goto free_spec;
+
+ rx_tables = &rx_fs->tables;
+ ft_crypto = &rx_tables->ft_crypto;
+
+ /* Rx crypto table */
+ ft_attr.level = RX_CRYPTO_TABLE_LEVEL;
+ ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+ flow_table = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "Failed to create MACsec Rx crypto table err(%d)\n", err);
+ goto out_flow_group;
+ }
+ ft_crypto->t = flow_table;
+
+ /* Rx crypto table groups */
+ err = macsec_fs_rx_create_crypto_table_groups(ft_crypto);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add MACsec Rx crypto table default miss rule %d\n",
+ err);
+ goto err;
+ }
+ rx_tables->crypto_miss_rule = rule;
+
+ /* Rx check table */
+ flow_table = macsec_fs_auto_group_table_create(ns,
+ MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT,
+ RX_CHECK_TABLE_LEVEL,
+ RX_CHECK_TABLE_NUM_FTE);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "fail to create MACsec RX check table, err(%d)\n", err);
+ goto err;
+ }
+ rx_tables->ft_check = flow_table;
+
+ /* Rx check table Default miss group/rule */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+ flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in);
+ if (IS_ERR(flow_group)) {
+ err = PTR_ERR(flow_group);
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Rx check table err(%d)\n",
+ err);
+ goto err;
+ }
+ rx_tables->ft_check_group = flow_group;
+
+ /* Rx check table default drop rule */
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err);
+ goto err;
+ }
+ rx_tables->check_miss_rule = rule;
+
+ /* Rx check table decap rules */
+ err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+ MLX5_SECTAG_HEADER_SIZE_WITH_SCI);
+ if (err)
+ goto err;
+
+ err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+ MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI);
+ if (err)
+ goto err;
+
+ goto out_flow_group;
+
+err:
+ macsec_fs_rx_destroy(macsec_fs);
+out_flow_group:
+ kvfree(flow_group_in);
+free_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int macsec_fs_rx_ft_get(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+ int err = 0;
+
+ if (rx_tables->refcnt)
+ goto out;
+
+ err = macsec_fs_rx_create(macsec_fs);
+ if (err)
+ return err;
+
+out:
+ rx_tables->refcnt++;
+ return err;
+}
+
+static void macsec_fs_rx_ft_put(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+
+ if (--rx_tables->refcnt)
+ return;
+
+ macsec_fs_rx_destroy(macsec_fs);
+}
+
+static void macsec_fs_rx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5e_macsec_rx_rule *rx_rule)
+{
+ int i;
+
+ for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) {
+ if (rx_rule->rule[i]) {
+ mlx5_del_flow_rules(rx_rule->rule[i]);
+ rx_rule->rule[i] = NULL;
+ }
+ }
+
+ if (rx_rule->meta_modhdr) {
+ mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr);
+ rx_rule->meta_modhdr = NULL;
+ }
+
+ kfree(rx_rule);
+
+ macsec_fs_rx_ft_put(macsec_fs);
+}
+
+static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_macsec_rule_attrs *attrs,
+ bool sci_present)
+{
+ u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num;
+ struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto;
+ __be32 *sci_p = (__be32 *)(&attrs->sci);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ /* MACsec ethertype */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+
+ /* Sectag AN + TCI SC present bit*/
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+ tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ if (sci_present) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters_5.macsec_tag_2);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2,
+ be32_to_cpu(sci_p[0]));
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters_5.macsec_tag_3);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3,
+ be32_to_cpu(sci_p[1]));
+ } else {
+ /* When SCI isn't present in the Sectag, need to match the source */
+ /* MAC address only if the SCI contains the default MACsec PORT */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16),
+ sci_p, ETH_ALEN);
+ }
+
+ crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+ crypto_params->obj_id = attrs->macsec_obj_id;
+}
+
+static union mlx5e_macsec_rule *
+macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 fs_id)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ union mlx5e_macsec_rule *macsec_rule = NULL;
+ struct mlx5_modify_hdr *modify_hdr = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_macsec_rx_rule *rx_rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ err = macsec_fs_rx_ft_get(macsec_fs);
+ if (err)
+ goto out_spec;
+
+ macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+ if (!macsec_rule) {
+ macsec_fs_rx_ft_put(macsec_fs);
+ goto out_spec;
+ }
+
+ rx_rule = &macsec_rule->rx_rule;
+ rx_tables = &rx_fs->tables;
+ ft_crypto = &rx_tables->ft_crypto;
+
+ /* Set bit[31 - 30] macsec marker - 0x01 */
+ /* Set bit[3-0] fs id */
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(set_action_in, action, data, fs_id | BIT(30));
+ MLX5_SET(set_action_in, action, offset, 0);
+ MLX5_SET(set_action_in, action, length, 32);
+
+ modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC,
+ 1, action);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(netdev, "fail to alloc MACsec set modify_header_id err=%d\n", err);
+ modify_hdr = NULL;
+ goto err;
+ }
+ rx_rule->meta_modhdr = modify_hdr;
+
+ /* Rx crypto table with SCI rule */
+ macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true);
+
+ flow_act.modify_hdr = modify_hdr;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rx_tables->ft_check;
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+ rx_rule->rule[0] = rule;
+
+ /* Rx crypto table without SCI rule */
+ if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) {
+ memset(spec, 0, sizeof(struct mlx5_flow_spec));
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false);
+
+ flow_act.modify_hdr = modify_hdr;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rx_tables->ft_check;
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+ rx_rule->rule[1] = rule;
+ }
+
+ return macsec_rule;
+
+err:
+ macsec_fs_rx_del_rule(macsec_fs, rx_rule);
+ macsec_rule = NULL;
+out_spec:
+ kvfree(spec);
+ return macsec_rule;
+}
+
+static int macsec_fs_rx_init(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_macsec_rx *rx_fs;
+ struct mlx5_fc *flow_counter;
+ int err;
+
+ rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL);
+ if (!rx_fs)
+ return -ENOMEM;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Rx encrypt flow counter, err(%d)\n",
+ err);
+ goto err_encrypt_counter;
+ }
+
+ rx_tables = &rx_fs->tables;
+ rx_tables->check_rule_counter = flow_counter;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Rx drop flow counter, err(%d)\n",
+ err);
+ goto err_drop_counter;
+ }
+ rx_tables->check_miss_rule_counter = flow_counter;
+
+ macsec_fs->rx_fs = rx_fs;
+
+ return 0;
+
+err_drop_counter:
+ mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+ rx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+ kfree(rx_fs);
+ macsec_fs->rx_fs = NULL;
+
+ return err;
+}
+
+static void macsec_fs_rx_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *rx_tables;
+
+ if (!rx_fs)
+ return;
+
+ rx_tables = &rx_fs->tables;
+
+ if (rx_tables->refcnt) {
+ netdev_err(macsec_fs->netdev,
+ "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n",
+ rx_tables->refcnt);
+ return;
+ }
+
+ if (rx_tables->check_miss_rule_counter) {
+ mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter);
+ rx_tables->check_miss_rule_counter = NULL;
+ }
+
+ if (rx_tables->check_rule_counter) {
+ mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+ rx_tables->check_rule_counter = NULL;
+ }
+
+ kfree(rx_fs);
+ macsec_fs->rx_fs = NULL;
+}
+
+void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats)
+{
+ struct mlx5e_macsec_stats *stats = (struct mlx5e_macsec_stats *)macsec_stats;
+ struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+
+ if (tx_tables->check_rule_counter)
+ mlx5_fc_query(mdev, tx_tables->check_rule_counter,
+ &stats->macsec_tx_pkts, &stats->macsec_tx_bytes);
+
+ if (tx_tables->check_miss_rule_counter)
+ mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter,
+ &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop);
+
+ if (rx_tables->check_rule_counter)
+ mlx5_fc_query(mdev, rx_tables->check_rule_counter,
+ &stats->macsec_rx_pkts, &stats->macsec_rx_bytes);
+
+ if (rx_tables->check_miss_rule_counter)
+ mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter,
+ &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop);
+}
+
+union mlx5e_macsec_rule *
+mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id)
+{
+ return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+ macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) :
+ macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id);
+}
+
+void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ union mlx5e_macsec_rule *macsec_rule,
+ int action)
+{
+ (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+ macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule) :
+ macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule);
+}
+
+void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ macsec_fs_rx_cleanup(macsec_fs);
+ macsec_fs_tx_cleanup(macsec_fs);
+ kfree(macsec_fs);
+}
+
+struct mlx5e_macsec_fs *
+mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_macsec_fs *macsec_fs;
+ int err;
+
+ macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL);
+ if (!macsec_fs)
+ return NULL;
+
+ macsec_fs->mdev = mdev;
+ macsec_fs->netdev = netdev;
+
+ err = macsec_fs_tx_init(macsec_fs);
+ if (err) {
+ netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+ goto err;
+ }
+
+ err = macsec_fs_rx_init(macsec_fs);
+ if (err) {
+ netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+ goto tx_cleanup;
+ }
+
+ return macsec_fs;
+
+tx_cleanup:
+ macsec_fs_tx_cleanup(macsec_fs);
+err:
+ kfree(macsec_fs);
+ return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h
new file mode 100644
index 000000000000..b429648d4ee7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_MACSEC_STEERING_H__
+#define __MLX5_MACSEC_STEERING_H__
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+
+#include "en_accel/macsec.h"
+
+#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16
+
+struct mlx5e_macsec_fs;
+union mlx5e_macsec_rule;
+
+struct mlx5_macsec_rule_attrs {
+ sci_t sci;
+ u32 macsec_obj_id;
+ u8 assoc_num;
+ int action;
+};
+
+enum mlx5_macsec_action {
+ MLX5_ACCEL_MACSEC_ACTION_ENCRYPT,
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT,
+};
+
+void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs);
+
+struct mlx5e_macsec_fs *
+mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, struct net_device *netdev);
+
+union mlx5e_macsec_rule *
+mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id);
+
+void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ union mlx5e_macsec_rule *macsec_rule,
+ int action);
+
+void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats);
+
+#endif
+
+#endif /* __MLX5_MACSEC_STEERING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c
new file mode 100644
index 000000000000..e50a2e3f3d18
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "en_accel/macsec.h"
+
+static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes_drop) },
+};
+
+#define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(macsec_hw)
+{
+ if (!priv->macsec)
+ return 0;
+
+ if (mlx5e_is_macsec_device(priv->mdev))
+ return NUM_MACSEC_HW_COUNTERS;
+
+ return 0;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(macsec_hw) {}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw)
+{
+ unsigned int i;
+
+ if (!priv->macsec)
+ return idx;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return idx;
+
+ for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_macsec_hw_stats_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw)
+{
+ int i;
+
+ if (!priv->macsec)
+ return idx;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return idx;
+
+ mlx5e_macsec_get_stats_fill(priv->macsec, mlx5e_macsec_get_stats(priv->macsec));
+ for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5e_macsec_get_stats(priv->macsec),
+ mlx5e_macsec_hw_stats_desc,
+ i);
+
+ return idx;
+}
+
+MLX5E_DEFINE_STATS_GRP(macsec_hw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
deleted file mode 100644
index fba561ffe1d4..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include "en_accel/tls.h"
-#include "accel/tls.h"
-
-static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- MLX5_SET(tls_flow, flow, ipv6, 0);
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- MLX5_SET(tls_flow, flow, ipv6, 1);
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-}
-#endif
-
-static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
- MLX5_FLD_SZ_BYTES(tls_flow, src_port));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
- MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
-}
-
-static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
-{
- switch (sk->sk_family) {
- case AF_INET:
- mlx5e_tls_set_ipv4_flow(flow, sk);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- if (!sk->sk_ipv6only &&
- ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
- mlx5e_tls_set_ipv4_flow(flow, sk);
- break;
- }
- if (!(caps & MLX5_ACCEL_TLS_IPV6))
- goto error_out;
-
- mlx5e_tls_set_ipv6_flow(flow, sk);
- break;
-#endif
- default:
- goto error_out;
- }
-
- mlx5e_tls_set_flow_tcp_ports(flow, sk);
- return 0;
-error_out:
- return -EINVAL;
-}
-
-static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
- enum tls_offload_ctx_dir direction,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 caps = mlx5_accel_tls_device_caps(mdev);
- int ret = -ENOMEM;
- void *flow;
- u32 swid;
-
- flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
- if (!flow)
- return ret;
-
- ret = mlx5e_tls_set_flow(flow, sk, caps);
- if (ret)
- goto free_flow;
-
- ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info,
- start_offload_tcp_sn, &swid,
- direction == TLS_OFFLOAD_CTX_DIR_TX);
- if (ret < 0)
- goto free_flow;
-
- if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
- struct mlx5e_tls_offload_context_tx *tx_ctx =
- mlx5e_get_tls_tx_context(tls_ctx);
-
- tx_ctx->swid = htonl(swid);
- tx_ctx->expected_seq = start_offload_tcp_sn;
- } else {
- struct mlx5e_tls_offload_context_rx *rx_ctx =
- mlx5e_get_tls_rx_context(tls_ctx);
-
- rx_ctx->handle = htonl(swid);
- }
-
- return 0;
-free_flow:
- kfree(flow);
- return ret;
-}
-
-static void mlx5e_tls_del(struct net_device *netdev,
- struct tls_context *tls_ctx,
- enum tls_offload_ctx_dir direction)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- unsigned int handle;
-
- handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ?
- mlx5e_get_tls_tx_context(tls_ctx)->swid :
- mlx5e_get_tls_rx_context(tls_ctx)->handle);
-
- mlx5_accel_tls_del_flow(priv->mdev, handle,
- direction == TLS_OFFLOAD_CTX_DIR_TX);
-}
-
-static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
- u32 seq, u8 *rcd_sn_data,
- enum tls_offload_ctx_dir direction)
-{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_tls_offload_context_rx *rx_ctx;
- u64 rcd_sn = *(u64 *)rcd_sn_data;
-
- if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
- return -EINVAL;
- rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
-
- netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
- be64_to_cpu(rcd_sn));
- mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
-
- return 0;
-}
-
-static const struct tlsdev_ops mlx5e_tls_ops = {
- .tls_dev_add = mlx5e_tls_add,
- .tls_dev_del = mlx5e_tls_del,
- .tls_dev_resync = mlx5e_tls_resync,
-};
-
-void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
-{
- struct net_device *netdev = priv->netdev;
- u32 caps;
-
- if (mlx5_accel_is_ktls_device(priv->mdev)) {
- mlx5e_ktls_build_netdev(priv);
- return;
- }
-
- if (!mlx5_accel_is_tls_device(priv->mdev))
- return;
-
- caps = mlx5_accel_tls_device_caps(priv->mdev);
- if (caps & MLX5_ACCEL_TLS_TX) {
- netdev->features |= NETIF_F_HW_TLS_TX;
- netdev->hw_features |= NETIF_F_HW_TLS_TX;
- }
-
- if (caps & MLX5_ACCEL_TLS_RX) {
- netdev->features |= NETIF_F_HW_TLS_RX;
- netdev->hw_features |= NETIF_F_HW_TLS_RX;
- }
-
- if (!(caps & MLX5_ACCEL_TLS_LRO)) {
- netdev->features &= ~NETIF_F_LRO;
- netdev->hw_features &= ~NETIF_F_LRO;
- }
-
- netdev->tlsdev_ops = &mlx5e_tls_ops;
-}
-
-int mlx5e_tls_init(struct mlx5e_priv *priv)
-{
- struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL);
-
- if (!tls)
- return -ENOMEM;
-
- priv->tls = tls;
- return 0;
-}
-
-void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
-{
- struct mlx5e_tls *tls = priv->tls;
-
- if (!tls)
- return;
-
- kfree(tls);
- priv->tls = NULL;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
deleted file mode 100644
index 9015f3f7792d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#ifndef __MLX5E_TLS_H__
-#define __MLX5E_TLS_H__
-
-#include "accel/tls.h"
-#include "en_accel/ktls.h"
-
-#ifdef CONFIG_MLX5_EN_TLS
-#include <net/tls.h>
-#include "en.h"
-
-struct mlx5e_tls_sw_stats {
- atomic64_t tx_tls_drop_metadata;
- atomic64_t tx_tls_drop_resync_alloc;
- atomic64_t tx_tls_drop_no_sync_data;
- atomic64_t tx_tls_drop_bypass_required;
- atomic64_t rx_tls_drop_resync_request;
- atomic64_t rx_tls_resync_request;
- atomic64_t rx_tls_resync_reply;
- atomic64_t rx_tls_auth_fail;
-};
-
-struct mlx5e_tls {
- struct mlx5e_tls_sw_stats sw_stats;
-};
-
-struct mlx5e_tls_offload_context_tx {
- struct tls_offload_context_tx base;
- u32 expected_seq;
- __be32 swid;
-};
-
-static inline struct mlx5e_tls_offload_context_tx *
-mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
-{
- BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) >
- TLS_OFFLOAD_CONTEXT_SIZE_TX);
- return container_of(tls_offload_ctx_tx(tls_ctx),
- struct mlx5e_tls_offload_context_tx,
- base);
-}
-
-struct mlx5e_tls_offload_context_rx {
- struct tls_offload_context_rx base;
- __be32 handle;
-};
-
-static inline struct mlx5e_tls_offload_context_rx *
-mlx5e_get_tls_rx_context(struct tls_context *tls_ctx)
-{
- BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) >
- TLS_OFFLOAD_CONTEXT_SIZE_RX);
- return container_of(tls_offload_ctx_rx(tls_ctx),
- struct mlx5e_tls_offload_context_rx,
- base);
-}
-
-void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
-int mlx5e_tls_init(struct mlx5e_priv *priv);
-void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
-
-int mlx5e_tls_get_count(struct mlx5e_priv *priv);
-int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
-int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
-
-#else
-
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
-{
- if (mlx5_accel_is_ktls_device(priv->mdev))
- mlx5e_ktls_build_netdev(priv);
-}
-
-static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
-static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
-static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
-static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
-
-#endif
-
-#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
deleted file mode 100644
index ef1ed15a53b4..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include "en_accel/tls.h"
-#include "en_accel/tls_rxtx.h"
-#include "accel/accel.h"
-
-#include <net/inet6_hashtables.h>
-#include <linux/ipv6.h>
-
-#define SYNDROM_DECRYPTED 0x30
-#define SYNDROM_RESYNC_REQUEST 0x31
-#define SYNDROM_AUTH_FAILED 0x32
-
-#define SYNDROME_OFFLOAD_REQUIRED 32
-#define SYNDROME_SYNC 33
-
-struct sync_info {
- u64 rcd_sn;
- s32 sync_len;
- int nr_frags;
- skb_frag_t frags[MAX_SKB_FRAGS];
-};
-
-struct recv_metadata_content {
- u8 syndrome;
- u8 reserved;
- __be32 sync_seq;
-} __packed;
-
-struct send_metadata_content {
- /* One byte of syndrome followed by 3 bytes of swid */
- __be32 syndrome_swid;
- __be16 first_seq;
-} __packed;
-
-struct mlx5e_tls_metadata {
- union {
- /* from fpga to host */
- struct recv_metadata_content recv;
- /* from host to fpga */
- struct send_metadata_content send;
- unsigned char raw[6];
- } __packed content;
- /* packet type ID field */
- __be16 ethertype;
-} __packed;
-
-static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
-{
- struct mlx5e_tls_metadata *pet;
- struct ethhdr *eth;
-
- if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
- return -ENOMEM;
-
- eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
- skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
- pet = (struct mlx5e_tls_metadata *)(eth + 1);
-
- memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
- 2 * ETH_ALEN);
-
- eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
- pet->content.send.syndrome_swid =
- htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
-
- return 0;
-}
-
-static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context,
- u32 tcp_seq, struct sync_info *info)
-{
- int remaining, i = 0, ret = -EINVAL;
- struct tls_record_info *record;
- unsigned long flags;
- s32 sync_size;
-
- spin_lock_irqsave(&context->base.lock, flags);
- record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
-
- if (unlikely(!record))
- goto out;
-
- sync_size = tcp_seq - tls_record_start_seq(record);
- info->sync_len = sync_size;
- if (unlikely(sync_size < 0)) {
- if (tls_record_is_start_marker(record))
- goto done;
-
- goto out;
- }
-
- remaining = sync_size;
- while (remaining > 0) {
- info->frags[i] = record->frags[i];
- __skb_frag_ref(&info->frags[i]);
- remaining -= skb_frag_size(&info->frags[i]);
-
- if (remaining < 0)
- skb_frag_size_add(&info->frags[i], remaining);
-
- i++;
- }
- info->nr_frags = i;
-done:
- ret = 0;
-out:
- spin_unlock_irqrestore(&context->base.lock, flags);
- return ret;
-}
-
-static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
- struct sk_buff *nskb, u32 tcp_seq,
- int headln, __be64 rcd_sn)
-{
- struct mlx5e_tls_metadata *pet;
- u8 syndrome = SYNDROME_SYNC;
- struct iphdr *iph;
- struct tcphdr *th;
- int data_len, mss;
-
- nskb->dev = skb->dev;
- skb_reset_mac_header(nskb);
- skb_set_network_header(nskb, skb_network_offset(skb));
- skb_set_transport_header(nskb, skb_transport_offset(skb));
- memcpy(nskb->data, skb->data, headln);
- memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
-
- iph = ip_hdr(nskb);
- iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
- th = tcp_hdr(nskb);
- data_len = nskb->len - headln;
- tcp_seq -= data_len;
- th->seq = htonl(tcp_seq);
-
- mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
- skb_shinfo(nskb)->gso_size = 0;
- if (data_len > mss) {
- skb_shinfo(nskb)->gso_size = mss;
- skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
- }
- skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
-
- pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
- memcpy(pet, &syndrome, sizeof(syndrome));
- pet->content.send.first_seq = htons(tcp_seq);
-
- /* MLX5 devices don't care about the checksum partial start, offset
- * and pseudo header
- */
- nskb->ip_summed = CHECKSUM_PARTIAL;
-
- nskb->queue_mapping = skb->queue_mapping;
-}
-
-static struct sk_buff *
-mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
- struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe **wqe,
- u16 *pi,
- struct mlx5e_tls *tls)
-{
- u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
- struct sync_info info;
- struct sk_buff *nskb;
- int linear_len = 0;
- int headln;
- int i;
-
- sq->stats->tls_ooo++;
-
- if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
- /* We might get here if a retransmission reaches the driver
- * after the relevant record is acked.
- * It should be safe to drop the packet in this case
- */
- atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
- goto err_out;
- }
-
- if (unlikely(info.sync_len < 0)) {
- u32 payload;
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- payload = skb->len - headln;
- if (likely(payload <= -info.sync_len))
- /* SKB payload doesn't require offload
- */
- return skb;
-
- atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
- goto err_out;
- }
-
- if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
- atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
- goto err_out;
- }
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- linear_len += headln + sizeof(info.rcd_sn);
- nskb = alloc_skb(linear_len, GFP_ATOMIC);
- if (unlikely(!nskb)) {
- atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
- goto err_out;
- }
-
- context->expected_seq = tcp_seq + skb->len - headln;
- skb_put(nskb, linear_len);
- for (i = 0; i < info.nr_frags; i++)
- skb_shinfo(nskb)->frags[i] = info.frags[i];
-
- skb_shinfo(nskb)->nr_frags = info.nr_frags;
- nskb->data_len = info.sync_len;
- nskb->len += info.sync_len;
- sq->stats->tls_resync_bytes += nskb->len;
- mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
- cpu_to_be64(info.rcd_sn));
- mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
- *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
- return skb;
-
-err_out:
- dev_kfree_skb_any(skb);
- return NULL;
-}
-
-struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
- struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
- struct mlx5e_tx_wqe **wqe,
- u16 *pi)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_tls_offload_context_tx *context;
- struct tls_context *tls_ctx;
- u32 expected_seq;
- int datalen;
- u32 skb_seq;
-
- if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) {
- skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
- goto out;
- }
-
- if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
- goto out;
-
- datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
- if (!datalen)
- goto out;
-
- tls_ctx = tls_get_ctx(skb->sk);
- if (unlikely(tls_ctx->netdev != netdev))
- goto out;
-
- skb_seq = ntohl(tcp_hdr(skb)->seq);
- context = mlx5e_get_tls_tx_context(tls_ctx);
- expected_seq = context->expected_seq;
-
- if (unlikely(expected_seq != skb_seq)) {
- skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
- goto out;
- }
-
- if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
- atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
- dev_kfree_skb_any(skb);
- skb = NULL;
- goto out;
- }
-
- context->expected_seq = skb_seq + datalen;
-out:
- return skb;
-}
-
-static int tls_update_resync_sn(struct net_device *netdev,
- struct sk_buff *skb,
- struct mlx5e_tls_metadata *mdata)
-{
- struct sock *sk = NULL;
- struct iphdr *iph;
- struct tcphdr *th;
- __be32 seq;
-
- if (mdata->ethertype != htons(ETH_P_IP))
- return -EINVAL;
-
- iph = (struct iphdr *)(mdata + 1);
-
- th = ((void *)iph) + iph->ihl * 4;
-
- if (iph->version == 4) {
- sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
- iph->saddr, th->source, iph->daddr,
- th->dest, netdev->ifindex);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
-
- sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
- &ipv6h->saddr, th->source,
- &ipv6h->daddr, ntohs(th->dest),
- netdev->ifindex, 0);
-#endif
- }
- if (!sk || sk->sk_state == TCP_TIME_WAIT) {
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request);
- goto out;
- }
-
- skb->sk = sk;
- skb->destructor = sock_edemux;
-
- memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq));
- tls_offload_rx_resync_request(sk, seq);
-out:
- return 0;
-}
-
-void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
- u32 *cqe_bcnt)
-{
- struct mlx5e_tls_metadata *mdata;
- struct mlx5e_priv *priv;
-
- if (!is_metadata_hdr_valid(skb))
- return;
-
- /* Use the metadata */
- mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN);
- switch (mdata->content.recv.syndrome) {
- case SYNDROM_DECRYPTED:
- skb->decrypted = 1;
- break;
- case SYNDROM_RESYNC_REQUEST:
- tls_update_resync_sn(netdev, skb, mdata);
- priv = netdev_priv(netdev);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request);
- break;
- case SYNDROM_AUTH_FAILED:
- /* Authentication failure will be observed and verified by kTLS */
- priv = netdev_priv(netdev);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail);
- break;
- default:
- /* Bypass the metadata header to others */
- return;
- }
-
- remove_metadata_hdr(skb);
- *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
deleted file mode 100644
index 90bc1f2384c8..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5E_TLS_RXTX_H__
-#define __MLX5E_TLS_RXTX_H__
-
-#ifdef CONFIG_MLX5_EN_TLS
-
-#include <linux/skbuff.h>
-#include "en.h"
-#include "en/txrx.h"
-
-struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
- struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
- struct mlx5e_tx_wqe **wqe,
- u16 *pi);
-
-void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
- u32 *cqe_bcnt);
-
-#endif /* CONFIG_MLX5_EN_TLS */
-
-#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 2c75b2752f58..0ae1865086ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -36,6 +36,32 @@
#include <linux/ipv6.h>
#include "en.h"
+#define ARFS_HASH_SHIFT BITS_PER_BYTE
+#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct arfs_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *default_rule;
+ struct hlist_head rules_hash[ARFS_HASH_SIZE];
+};
+
+enum arfs_type {
+ ARFS_IPV4_TCP,
+ ARFS_IPV6_TCP,
+ ARFS_IPV4_UDP,
+ ARFS_IPV6_UDP,
+ ARFS_NUM_TYPES,
+};
+
+struct mlx5e_arfs_tables {
+ struct arfs_table arfs_tables[ARFS_NUM_TYPES];
+ /* Protect aRFS rules list */
+ spinlock_t arfs_lock;
+ struct list_head rules;
+ int last_filter_id;
+ struct workqueue_struct *wq;
+};
+
struct arfs_tuple {
__be16 etype;
u8 ip_proto;
@@ -72,75 +98,65 @@ struct arfs_rule {
for (j = 0; j < ARFS_HASH_SIZE; j++) \
hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
-static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
{
switch (type) {
case ARFS_IPV4_TCP:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
case ARFS_IPV4_UDP:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
case ARFS_IPV6_TCP:
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
case ARFS_IPV6_UDP:
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
default:
return -EINVAL;
}
}
-static int arfs_disable(struct mlx5e_priv *priv)
+static int arfs_disable(struct mlx5e_flow_steering *fs)
{
- struct mlx5_flow_destination dest = {};
- struct mlx5e_tir *tir = priv->indir_tir;
- int err = 0;
- int tt;
- int i;
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- dest.tir_num = tir[i].tirn;
- tt = arfs_get_tt(i);
- /* Modify ttc rules destination to bypass the aRFS tables*/
- err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
- &dest, NULL);
+ /* Modify ttc rules destination back to their default */
+ err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc destination failed\n",
- __func__);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, arfs_get_tt(i), err);
return err;
}
}
return 0;
}
-static void arfs_del_rules(struct mlx5e_priv *priv);
+static void arfs_del_rules(struct mlx5e_flow_steering *fs);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv)
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
{
- arfs_del_rules(priv);
+ arfs_del_rules(fs);
- return arfs_disable(priv);
+ return arfs_disable(fs);
}
-int mlx5e_arfs_enable(struct mlx5e_priv *priv)
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct mlx5_flow_destination dest = {};
- int err = 0;
- int tt;
- int i;
+ int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
- tt = arfs_get_tt(i);
+ dest.ft = arfs->arfs_tables[i].ft.t;
/* Modify ttc rules destination to point on the aRFS FTs */
- err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
- &dest, NULL);
+ err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc destination failed err=%d\n",
- __func__, err);
- arfs_disable(priv);
+ fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
+ __func__, arfs_get_tt(i), err);
+ arfs_disable(fs);
return err;
}
}
@@ -153,60 +169,62 @@ static void arfs_destroy_table(struct arfs_table *arfs_t)
mlx5e_destroy_flow_table(&arfs_t->ft);
}
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
int i;
- if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
- return;
-
- arfs_del_rules(priv);
- destroy_workqueue(priv->fs.arfs.wq);
+ arfs_del_rules(fs);
+ destroy_workqueue(arfs->wq);
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
- arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
+ if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t))
+ arfs_destroy_table(&arfs->arfs_tables[i]);
}
}
-static int arfs_add_default_rule(struct mlx5e_priv *priv,
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+
+ if (!ntuple)
+ return;
+
+ _mlx5e_cleanup_tables(fs);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
+}
+
+static int arfs_add_default_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
enum arfs_type type)
{
- struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
- struct mlx5e_tir *tir = priv->indir_tir;
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct arfs_table *arfs_t = &arfs->arfs_tables[type];
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_spec *spec;
- enum mlx5e_traffic_types tt;
+ enum mlx5_traffic_types tt;
int err = 0;
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec) {
- err = -ENOMEM;
- goto out;
- }
-
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
tt = arfs_get_tt(type);
if (tt == -EINVAL) {
- netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
- __func__, type);
- err = -EINVAL;
- goto out;
+ fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type);
+ return -EINVAL;
}
- dest.tir_num = tir[tt].tirn;
-
- arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
+ /* FIXME: Must use mlx5_ttc_get_default_dest(),
+ * but can't since TTC default is not setup yet !
+ */
+ dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt);
+ arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
&flow_act,
&dest, 1);
if (IS_ERR(arfs_t->default_rule)) {
err = PTR_ERR(arfs_t->default_rule);
arfs_t->default_rule = NULL;
- netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
- __func__, type);
+ fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type);
}
-out:
- kvfree(spec);
+
return err;
}
@@ -229,7 +247,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
sizeof(*ft->g), GFP_KERNEL);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in || !ft->g) {
- kvfree(ft->g);
+ kfree(ft->g);
kvfree(in);
return -ENOMEM;
}
@@ -306,10 +324,12 @@ out:
return err;
}
-static int arfs_create_table(struct mlx5e_priv *priv,
+static int arfs_create_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
enum arfs_type type)
{
- struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -320,7 +340,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
ft_attr.level = MLX5E_ARFS_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
@@ -331,7 +351,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
if (err)
goto err;
- err = arfs_add_default_rule(priv, type);
+ err = arfs_add_default_rule(fs, rx_res, type);
if (err)
goto err;
@@ -341,28 +361,40 @@ err:
return err;
}
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
{
- int err = 0;
+ struct mlx5e_arfs_tables *arfs;
+ int err = -ENOMEM;
int i;
- if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ if (!ntuple)
return 0;
- spin_lock_init(&priv->fs.arfs.arfs_lock);
- INIT_LIST_HEAD(&priv->fs.arfs.rules);
- priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
- if (!priv->fs.arfs.wq)
+ arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL);
+ if (!arfs)
return -ENOMEM;
+ spin_lock_init(&arfs->arfs_lock);
+ INIT_LIST_HEAD(&arfs->rules);
+ arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
+ if (!arfs->wq)
+ goto err;
+
+ mlx5e_fs_set_arfs(fs, arfs);
+
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- err = arfs_create_table(priv, i);
+ err = arfs_create_table(fs, rx_res, i);
if (err)
- goto err;
+ goto err_des;
}
return 0;
+
+err_des:
+ _mlx5e_cleanup_tables(fs);
err:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
return err;
}
@@ -370,15 +402,16 @@ err:
static void arfs_may_expire_flow(struct mlx5e_priv *priv)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_rule *arfs_rule;
struct hlist_node *htmp;
+ HLIST_HEAD(del_list);
int quota = 0;
int i;
int j;
- HLIST_HEAD(del_list);
- spin_lock_bh(&priv->fs.arfs.arfs_lock);
- mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) {
if (!work_pending(&arfs_rule->arfs_work) &&
rps_may_expire_flow(priv->netdev,
arfs_rule->rxq, arfs_rule->flow_id,
@@ -389,7 +422,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
break;
}
}
- spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
if (arfs_rule->rule)
mlx5_del_flow_rules(arfs_rule->rule);
@@ -398,20 +431,21 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
}
}
-static void arfs_del_rules(struct mlx5e_priv *priv)
+static void arfs_del_rules(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct hlist_node *htmp;
struct arfs_rule *rule;
+ HLIST_HEAD(del_list);
int i;
int j;
- HLIST_HEAD(del_list);
- spin_lock_bh(&priv->fs.arfs.arfs_lock);
- mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
hlist_add_head(&rule->hlist, &del_list);
}
- spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
cancel_work_sync(&rule->arfs_work);
@@ -455,7 +489,7 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
struct arfs_rule *arfs_rule)
{
- struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_tuple *tuple = &arfs_rule->tuple;
struct mlx5_flow_handle *rule = NULL;
struct mlx5_flow_destination dest = {};
@@ -533,11 +567,11 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
16);
}
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
+ dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
+ priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++;
mlx5e_dbg(HW, priv,
"%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
__func__, arfs_rule->filter_id, arfs_rule->rxq,
@@ -556,7 +590,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
int err = 0;
dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst.tir_num = priv->direct_tir[rxq].tirn;
+ dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
err = mlx5_modify_rule_destination(rule, &dst, NULL);
if (err)
netdev_warn(priv->netdev,
@@ -569,13 +603,15 @@ static void arfs_handle_work(struct work_struct *work)
struct arfs_rule,
arfs_work);
struct mlx5e_priv *priv = arfs_rule->priv;
+ struct mlx5e_arfs_tables *arfs;
struct mlx5_flow_handle *rule;
+ arfs = mlx5e_fs_get_arfs(priv->fs);
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ spin_lock_bh(&arfs->arfs_lock);
hlist_del(&arfs_rule->hlist);
- spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
mutex_unlock(&priv->state_lock);
kfree(arfs_rule);
@@ -601,6 +637,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
const struct flow_keys *fk,
u16 rxq, u32 flow_id)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_rule *rule;
struct arfs_tuple *tuple;
@@ -628,7 +665,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
tuple->dst_port = fk->ports.dst;
rule->flow_id = flow_id;
- rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
+ rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER;
hlist_add_head(&rule->hlist,
arfs_hash_bucket(arfs_t, tuple->src_port,
@@ -672,11 +709,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
- struct arfs_table *arfs_t;
+ struct mlx5e_arfs_tables *arfs;
struct arfs_rule *arfs_rule;
+ struct arfs_table *arfs_t;
struct flow_keys fk;
+ arfs = mlx5e_fs_get_arfs(priv->fs);
if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
return -EPROTONOSUPPORT;
@@ -706,7 +744,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
return -ENOMEM;
}
}
- queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
+ queue_work(arfs->wq, &arfs_rule->arfs_work);
spin_unlock_bh(&arfs->arfs_lock);
return arfs_rule->filter_id;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index f7890e0ce96c..68f19324db93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -33,36 +33,20 @@
#include "en.h"
/* mlx5e global resources should be placed in this file.
- * Global resources are common to all the netdevices crated on the same nic.
+ * Global resources are common to all the netdevices created on the same nic.
*/
-int mlx5e_create_tir(struct mlx5_core_dev *mdev,
- struct mlx5e_tir *tir, u32 *in, int inlen)
+void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
{
- int err;
-
- err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn);
- if (err)
- return err;
-
- mutex_lock(&mdev->mlx5e_res.td.list_lock);
- list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
- mutex_unlock(&mdev->mlx5e_res.td.list_lock);
-
- return 0;
-}
+ bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
+ bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+ bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read);
-void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
- struct mlx5e_tir *tir)
-{
- mutex_lock(&mdev->mlx5e_res.td.list_lock);
- mlx5_core_destroy_tir(mdev, tir->tirn);
- list_del(&tir->list);
- mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read);
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
}
-static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- struct mlx5_core_mkey *mkey)
+int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
void *mkc;
@@ -77,7 +61,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
-
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
MLX5_SET(mkc, mkc, pd, pdn);
MLX5_SET(mkc, mkc, length64, 1);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
@@ -90,7 +74,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
{
- struct mlx5e_resources *res = &mdev->mlx5e_res;
+ struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
int err;
err = mlx5_core_alloc_pd(mdev, &res->pdn);
@@ -117,13 +101,13 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
goto err_destroy_mkey;
}
- INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
- mutex_init(&mdev->mlx5e_res.td.list_lock);
+ INIT_LIST_HEAD(&res->td.tirs_list);
+ mutex_init(&res->td.list_lock);
return 0;
err_destroy_mkey:
- mlx5_core_destroy_mkey(mdev, &res->mkey);
+ mlx5_core_destroy_mkey(mdev, res->mkey);
err_dealloc_transport_domain:
mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
err_dealloc_pd:
@@ -133,19 +117,21 @@ err_dealloc_pd:
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
{
- struct mlx5e_resources *res = &mdev->mlx5e_res;
+ struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
mlx5_free_bfreg(mdev, &res->bfreg);
- mlx5_core_destroy_mkey(mdev, &res->mkey);
+ mlx5_core_destroy_mkey(mdev, res->mkey);
mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
mlx5_core_dealloc_pd(mdev, res->pdn);
memset(res, 0, sizeof(*res));
}
-int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+ bool enable_mc_lb)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_tir *tir;
+ u8 lb_flags = 0;
int err = 0;
u32 tirn = 0;
int inlen;
@@ -159,15 +145,20 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
}
if (enable_uc_lb)
- MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
+ lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+ if (enable_mc_lb)
+ lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+ if (lb_flags)
+ MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags);
MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
- mutex_lock(&mdev->mlx5e_res.td.list_lock);
- list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+ mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+ list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) {
tirn = tir->tirn;
- err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+ err = mlx5_core_modify_tir(mdev, tirn, in);
if (err)
goto out;
}
@@ -176,7 +167,7 @@ out:
kvfree(in);
if (err)
netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
- mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+ mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 01f2918063af..2449731b7d79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -35,6 +35,8 @@
#include "en/port.h"
#include "en/port_buffer.h"
+#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
+
#define MLX5E_100MB (100000)
#define MLX5E_1GB (1000000)
@@ -49,6 +51,12 @@ enum {
MLX5E_LOWEST_PRIO_GROUP = 0,
};
+enum {
+ MLX5_DCB_CHG_RESET,
+ MLX5_DCB_NO_CHG,
+ MLX5_DCB_CHG_NO_RESET,
+};
+
#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \
MLX5_CAP_QCAM_REG(mdev, qpts) && \
MLX5_CAP_QCAM_REG(mdev, qpdpm))
@@ -238,7 +246,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
* Report both group #0 and #1 as ETS type.
* All the tcs in group #0 will be reported with 0% BW.
*/
-int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
+static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
@@ -977,7 +985,7 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
return err;
}
-const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
+static const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
.ieee_getets = mlx5e_dcbnl_ieee_getets,
.ieee_setets = mlx5e_dcbnl_ieee_setets,
.ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate,
@@ -1009,6 +1017,15 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
.setpfcstate = mlx5e_dcbnl_setpfcstate,
};
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+}
+
static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
enum mlx5_dcbx_oper_mode *mode)
{
@@ -1098,49 +1115,50 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv)
mlx5e_dcbnl_dscp_app(priv, DELETE);
}
-static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv,
- struct mlx5e_params *params)
+static void mlx5e_params_calc_trust_tx_min_inline_mode(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u8 trust_state)
{
- mlx5_query_min_inline(priv->mdev, &params->tx_min_inline_mode);
- if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP &&
+ mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+ if (trust_state == MLX5_QPTS_TRUST_DSCP &&
params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
}
-static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
+static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
{
- struct mlx5e_channels new_channels = {};
-
- mutex_lock(&priv->state_lock);
-
- new_channels.params = priv->channels.params;
- mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- goto out;
- }
-
- /* Skip if tx_min_inline is the same */
- if (new_channels.params.tx_min_inline_mode ==
- priv->channels.params.tx_min_inline_mode)
- goto out;
+ u8 *trust_state = context;
+ int err;
- mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ err = mlx5_set_trust_state(priv->mdev, *trust_state);
+ if (err)
+ return err;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state);
-out:
- mutex_unlock(&priv->state_lock);
+ return 0;
}
static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
{
+ struct mlx5e_params new_params;
+ bool reset = true;
int err;
- err = mlx5_set_trust_state(priv->mdev, trust_state);
- if (err)
- return err;
- priv->dcbx_dp.trust_state = trust_state;
- mlx5e_trust_update_sq_inline_mode(priv);
+ mutex_lock(&priv->state_lock);
+
+ new_params = priv->channels.params;
+ mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params,
+ trust_state);
+
+ /* Skip if tx_min_inline is the same */
+ if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode)
+ reset = false;
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_update_trust_state_hw,
+ &trust_state, reset);
+
+ mutex_unlock(&priv->state_lock);
return err;
}
@@ -1160,18 +1178,31 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ u8 trust_state;
int err;
- priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
-
- if (!MLX5_DSCP_SUPPORTED(mdev))
+ if (!MLX5_DSCP_SUPPORTED(mdev)) {
+ WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP);
return 0;
+ }
- err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
+ err = mlx5_query_trust_state(priv->mdev, &trust_state);
if (err)
return err;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
+
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) {
+ /*
+ * Align the driver state with the register state.
+ * Temporary state change is required to enable the app list reset.
+ */
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP;
+ mlx5e_dcbnl_delete_app(priv);
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+ }
- mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params);
+ mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
+ priv->dcbx_dp.trust_state);
err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio);
if (err)
@@ -1180,6 +1211,24 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
return 0;
}
+#define MLX5E_BUFFER_CELL_SHIFT 7
+
+static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(sbcam_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(sbcam_reg)] = {};
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return (1 << MLX5E_BUFFER_CELL_SHIFT);
+
+ if (mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_SBCAM, 0, 0))
+ return (1 << MLX5E_BUFFER_CELL_SHIFT);
+
+ return MLX5_GET(sbcam_reg, out, cap_cell_size);
+}
+
void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
{
struct mlx5e_dcbx *dcbx = &priv->dcbx;
@@ -1197,6 +1246,7 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
+ priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv);
priv->dcbx.manual_buffer = false;
priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d674cb679895..24aa25da482b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -30,24 +30,27 @@
* SOFTWARE.
*/
+#include <linux/ethtool_netlink.h>
+
#include "en.h"
#include "en/port.h"
-#include "en/xsk/umem.h"
+#include "en/params.h"
+#include "en/xsk/pool.h"
+#include "en/ptp.h"
#include "lib/clock.h"
+#include "en/fs_ethtool.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
struct ethtool_drvinfo *drvinfo)
{
struct mlx5_core_dev *mdev = priv->mdev;
- strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver));
- strlcpy(drvinfo->version, DRIVER_VERSION,
- sizeof(drvinfo->version));
+ strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
mdev->board_id);
- strlcpy(drvinfo->bus_info, dev_name(mdev->device),
+ strscpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}
@@ -194,13 +197,31 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_100000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseDR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_2_200GBASE_CR2_KR2, ext,
+ ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_4_400GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT);
}
static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
struct ptys2ethtool_config **arr,
u32 *size)
{
- bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ bool ext = mlx5e_ptys_ext_supported(mdev);
*arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
*size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
@@ -225,7 +246,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
return MLX5E_NUM_PFLAGS;
case ETH_SS_TEST:
return mlx5e_self_test_num(priv);
- /* fallthrough */
default:
return -EOPNOTSUPP;
}
@@ -250,9 +270,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
break;
case ETH_SS_TEST:
- for (i = 0; i < mlx5e_self_test_num(priv); i++)
- strcpy(data + i * ETH_GSTRING_LEN,
- mlx5e_self_tests[i]);
+ mlx5e_self_test_fill_strings(priv, data);
break;
case ETH_SS_STATS:
@@ -290,26 +308,42 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
}
void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param)
{
- param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
+ /* Limitation for regular RQ. XSK RQ may clamp the queue length in
+ * mlx5e_mpwqe_get_log_rq_size.
+ */
+ u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev,
+ PAGE_SHIFT,
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED);
+
+ param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ max_log_mpwrq_pkts);
param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
param->tx_pending = 1 << priv->channels.params.log_sq_size;
+
+ kernel_param->tcp_data_split =
+ (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ?
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED :
+ ETHTOOL_TCP_DATA_SPLIT_DISABLED;
}
static void mlx5e_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- mlx5e_ethtool_get_ringparam(priv, param);
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
}
int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
struct ethtool_ringparam *param)
{
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
u8 log_rq_size;
u8 log_sq_size;
int err = 0;
@@ -348,16 +382,15 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
- new_channels.params = priv->channels.params;
- new_channels.params.log_rq_mtu_frames = log_rq_size;
- new_channels.params.log_sq_size = log_sq_size;
+ new_params = priv->channels.params;
+ new_params.log_rq_mtu_frames = log_rq_size;
+ new_params.log_sq_size = log_sq_size;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
+ err = mlx5e_validate_params(priv->mdev, &new_params);
+ if (err)
goto unlock;
- }
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
unlock:
mutex_unlock(&priv->state_lock);
@@ -366,7 +399,9 @@ unlock:
}
static int mlx5e_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -402,8 +437,10 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
{
struct mlx5e_params *cur_params = &priv->channels.params;
unsigned int count = ch->combined_count;
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
bool arfs_enabled;
+ int rss_cnt;
+ bool opened;
int err = 0;
if (!count) {
@@ -427,30 +464,53 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
goto out;
}
- new_channels.params = priv->channels.params;
- new_channels.params.num_channels = count;
+ /* Don't allow changing the number of channels if HTB offload is active,
+ * because the numeration of the QoS SQs will change, while per-queue
+ * qdiscs are attached.
+ */
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- *cur_params = new_channels.params;
- if (!netif_is_rxfh_configured(priv->netdev))
- mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, count);
+ /* Don't allow changing the number of channels if non-default RSS contexts exist,
+ * the kernel doesn't protect against set_channels operations that break them.
+ */
+ rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1;
+ if (rss_cnt) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n",
+ __func__, rss_cnt);
goto out;
}
- arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE;
- if (arfs_enabled)
- mlx5e_arfs_disable(priv);
+ /* Don't allow changing the number of channels if MQPRIO mode channel offload is active,
+ * because it defines a partition over the channels queues.
+ */
+ if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
- if (!netif_is_rxfh_configured(priv->netdev))
- mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, count);
+ new_params = *cur_params;
+ new_params.num_channels = count;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE);
+ if (arfs_enabled)
+ mlx5e_arfs_disable(priv->fs);
/* Switch to new channels, set new parameters and close old ones */
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
if (arfs_enabled) {
- int err2 = mlx5e_arfs_enable(priv);
+ int err2 = mlx5e_arfs_enable(priv->fs);
if (err2)
netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
@@ -472,7 +532,8 @@ static int mlx5e_set_channels(struct net_device *dev,
}
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal)
{
struct dim_cq_moder *rx_moder, *tx_moder;
@@ -489,22 +550,29 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
coal->tx_max_coalesced_frames = tx_moder->pkts;
coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled;
+ kernel_coal->use_cqe_mode_rx =
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER);
+ kernel_coal->use_cqe_mode_tx =
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER);
+
return 0;
}
static int mlx5e_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_get_coalesce(priv, coal);
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
}
#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD
#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
static void
-mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
struct mlx5_core_dev *mdev = priv->mdev;
int tc;
@@ -519,6 +587,17 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
coal->tx_coalesce_usecs,
coal->tx_max_coalesced_frames);
}
+ }
+}
+
+static void
+mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i;
+
+ for (i = 0; i < priv->channels.num; ++i) {
+ struct mlx5e_channel *c = priv->channels.c[i];
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
@@ -526,14 +605,27 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
}
}
+/* convert a boolean value of cq_mode to mlx5 period mode
+ * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE
+ * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE
+ */
+static int cqe_mode_to_period_mode(bool val)
+{
+ return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct dim_cq_moder *rx_moder, *tx_moder;
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
+ bool reset_rx, reset_tx;
+ bool reset = true;
+ u8 cq_period_mode;
int err = 0;
- bool reset;
if (!MLX5_CAP_GEN(mdev, cq_moderation))
return -EOPNOTSUPP;
@@ -552,47 +644,78 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
return -ERANGE;
}
+ if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) &&
+ !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) {
+ NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device");
+ return -EOPNOTSUPP;
+ }
+
mutex_lock(&priv->state_lock);
- new_channels.params = priv->channels.params;
+ new_params = priv->channels.params;
- rx_moder = &new_channels.params.rx_cq_moderation;
+ rx_moder = &new_params.rx_cq_moderation;
rx_moder->usec = coal->rx_coalesce_usecs;
rx_moder->pkts = coal->rx_max_coalesced_frames;
- new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+ new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
- tx_moder = &new_channels.params.tx_cq_moderation;
+ tx_moder = &new_params.tx_cq_moderation;
tx_moder->usec = coal->tx_coalesce_usecs;
tx_moder->pkts = coal->tx_max_coalesced_frames;
- new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
+ new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- goto out;
+ reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
+ reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
+
+ cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx);
+ if (cq_period_mode != rx_moder->cq_period_mode) {
+ mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode);
+ reset_rx = true;
}
- /* we are opened */
- reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) ||
- (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled);
+ cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx);
+ if (cq_period_mode != tx_moder->cq_period_mode) {
+ mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode);
+ reset_tx = true;
+ }
- if (!reset) {
- mlx5e_set_priv_channels_coalesce(priv, coal);
- priv->channels.params = new_channels.params;
- goto out;
+ if (reset_rx) {
+ u8 mode = MLX5E_GET_PFLAG(&new_params,
+ MLX5E_PFLAG_RX_CQE_BASED_MODER);
+
+ mlx5e_reset_rx_moderation(&new_params, mode);
}
+ if (reset_tx) {
+ u8 mode = MLX5E_GET_PFLAG(&new_params,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER);
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ mlx5e_reset_tx_moderation(&new_params, mode);
+ }
+
+ /* If DIM state hasn't changed, it's possible to modify interrupt
+ * moderation parameters on the fly, even if the channels are open.
+ */
+ if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ if (!coal->use_adaptive_rx_coalesce)
+ mlx5e_set_priv_channels_rx_coalesce(priv, coal);
+ if (!coal->use_adaptive_tx_coalesce)
+ mlx5e_set_priv_channels_tx_coalesce(priv, coal);
+ reset = false;
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
-out:
mutex_unlock(&priv->state_lock);
return err;
}
static int mlx5e_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_set_coalesce(priv, coal);
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
}
static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
@@ -633,6 +756,8 @@ static const u32 pplm_fec_2_ethtool[] = {
[MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF,
[MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER,
[MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
+ [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS,
+ [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS,
};
static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
@@ -650,54 +775,59 @@ static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
return 0;
}
-/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */
-static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code)
-{
- u32 offset;
+#define MLX5E_ADVERTISE_SUPPORTED_FEC(mlx5_fec, ethtool_fec) \
+ do { \
+ if (mlx5e_fec_in_caps(dev, 1 << (mlx5_fec))) \
+ __set_bit(ethtool_fec, \
+ link_ksettings->link_modes.supported);\
+ } while (0)
- offset = find_first_bit(&ethtool_fec_code, sizeof(u32));
- offset -= ETHTOOL_FEC_OFF_BIT;
- offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT;
-
- return offset;
-}
+static const u32 pplm_fec_2_ethtool_linkmodes[] = {
+ [MLX5E_FEC_NOFEC] = ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+ [MLX5E_FEC_FIRECODE] = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+ [MLX5E_FEC_RS_528_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT,
+ [MLX5E_FEC_RS_544_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT,
+ [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_LINK_MODE_FEC_LLRS_BIT,
+};
static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
struct ethtool_link_ksettings *link_ksettings)
{
- u_long fec_caps = 0;
- u32 active_fec = 0;
- u32 offset;
+ unsigned long active_fec_long;
+ u32 active_fec;
u32 bitn;
int err;
- err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps);
- if (err)
- return (err == -EOPNOTSUPP) ? 0 : err;
-
err = mlx5e_get_fec_mode(dev, &active_fec, NULL);
if (err)
- return err;
-
- for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) {
- u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn];
-
- offset = ethtool_fec2ethtool_caps(ethtool_bitmask);
- __set_bit(offset, link_ksettings->link_modes.supported);
- }
+ return (err == -EOPNOTSUPP) ? 0 : err;
- active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE);
- offset = ethtool_fec2ethtool_caps(active_fec);
- __set_bit(offset, link_ksettings->link_modes.advertising);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_NOFEC,
+ ETHTOOL_LINK_MODE_FEC_NONE_BIT);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_FIRECODE,
+ ETHTOOL_LINK_MODE_FEC_BASER_BIT);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_RS_528_514,
+ ETHTOOL_LINK_MODE_FEC_RS_BIT);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_LLRS_272_257_1,
+ ETHTOOL_LINK_MODE_FEC_LLRS_BIT);
+
+ active_fec_long = active_fec;
+ /* active fec is a bit set, find out which bit is set and
+ * advertise the corresponding ethtool bit
+ */
+ bitn = find_first_bit(&active_fec_long, sizeof(active_fec_long) * BITS_PER_BYTE);
+ if (bitn < ARRAY_SIZE(pplm_fec_2_ethtool_linkmodes))
+ __set_bit(pplm_fec_2_ethtool_linkmodes[bitn],
+ link_ksettings->link_modes.advertising);
return 0;
}
-static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
- u32 eth_proto_cap,
- u8 connector_type, bool ext)
+static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev,
+ struct ethtool_link_ksettings *link_ksettings,
+ u32 eth_proto_cap, u8 connector_type)
{
- if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) {
if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
| MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
| MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
@@ -773,6 +903,7 @@ static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings
static void get_speed_duplex(struct net_device *netdev,
u32 eth_proto_oper, bool force_legacy,
+ u16 data_rate_oper,
struct ethtool_link_ksettings *link_ksettings)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -784,7 +915,10 @@ static void get_speed_duplex(struct net_device *netdev,
speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
if (!speed) {
- speed = SPEED_UNKNOWN;
+ if (data_rate_oper)
+ speed = 100 * data_rate_oper;
+ else
+ speed = SPEED_UNKNOWN;
goto out;
}
@@ -829,9 +963,9 @@ static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = {
[MLX5E_PORT_OTHER] = PORT_OTHER,
};
-static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext)
+static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type)
{
- if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type))
return ptys2connector_type[connector_type];
if (eth_proto &
@@ -864,7 +998,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
struct ethtool_link_ksettings *link_ksettings)
{
unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
- bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ bool ext = mlx5e_ptys_ext_supported(mdev);
ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
}
@@ -873,17 +1007,18 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
struct ethtool_link_ksettings *link_ksettings)
{
struct mlx5_core_dev *mdev = priv->mdev;
- u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
- u32 rx_pause = 0;
- u32 tx_pause = 0;
- u32 eth_proto_cap;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
u32 eth_proto_admin;
- u32 eth_proto_lp;
- u32 eth_proto_oper;
u8 an_disable_admin;
- u8 an_status;
+ u16 data_rate_oper;
+ u32 eth_proto_oper;
+ u32 eth_proto_cap;
u8 connector_type;
+ u32 rx_pause = 0;
+ u32 tx_pause = 0;
+ u32 eth_proto_lp;
bool admin_ext;
+ u8 an_status;
bool ext;
int err;
@@ -893,7 +1028,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
__func__, err);
goto err_query_regs;
}
- ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
eth_proto_capability);
eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
@@ -917,6 +1052,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
an_status = MLX5_GET(ptys_reg, out, an_status);
connector_type = MLX5_GET(ptys_reg, out, connector_type);
+ data_rate_oper = MLX5_GET(ptys_reg, out, data_rate_oper);
mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
@@ -927,14 +1063,14 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
admin_ext);
get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext,
- link_ksettings);
+ data_rate_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
-
- link_ksettings->base.port = get_connector_port(eth_proto_oper,
- connector_type, ext);
- ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
- connector_type, ext);
+ connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ?
+ connector_type : MLX5E_PORT_UNKNOWN;
+ link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type);
+ ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin,
+ connector_type);
get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
if (an_status == MLX5_AN_COMPLETE)
@@ -969,6 +1105,22 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
}
+static int mlx5e_speed_validate(struct net_device *netdev, bool ext,
+ const unsigned long link_modes, u8 autoneg)
+{
+ /* Extended link-mode has no speed limitations. */
+ if (ext)
+ return 0;
+
+ if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+ autoneg != AUTONEG_ENABLE) {
+ netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n",
+ __func__);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
{
u32 i, ptys_modes = 0;
@@ -991,7 +1143,8 @@ static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes)
unsigned long modes[2];
for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) {
- if (*ptys2ext_ethtool_table[i].advertised == 0)
+ if (ptys2ext_ethtool_table[i].advertised[0] == 0 &&
+ ptys2ext_ethtool_table[i].advertised[1] == 0)
continue;
memset(modes, 0, sizeof(modes));
bitmap_and(modes, ptys2ext_ethtool_table[i].advertised,
@@ -1045,7 +1198,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
autoneg = link_ksettings->base.autoneg;
speed = link_ksettings->base.speed;
- ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ ext_supported = mlx5e_ptys_ext_supported(mdev);
ext = ext_requested(autoneg, adver, ext_supported);
if (!ext_supported && ext)
return -EOPNOTSUPP;
@@ -1061,13 +1214,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
mlx5e_port_speed2linkmodes(mdev, speed, !ext);
- if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
- autoneg != AUTONEG_ENABLE) {
- netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n",
- __func__);
- err = -EINVAL;
+ err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg);
+ if (err)
goto out;
- }
link_modes = link_modes & eproto.cap;
if (!link_modes) {
@@ -1104,7 +1253,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
{
- return sizeof(priv->rss_params.toeplitz_hash_key);
+ return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
}
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -1126,85 +1275,64 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
-static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
- u8 *hfunc)
+static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir,
+ u8 *key, u8 *hfunc, u32 rss_context)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rss_params *rss = &priv->rss_params;
-
- if (indir)
- memcpy(indir, rss->indirection_rqt,
- sizeof(rss->indirection_rqt));
-
- if (key)
- memcpy(key, rss->toeplitz_hash_key,
- sizeof(rss->toeplitz_hash_key));
-
- if (hfunc)
- *hfunc = rss->hfunc;
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
- return 0;
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc);
+ mutex_unlock(&priv->state_lock);
+ return err;
}
-static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
- const u8 *key, const u8 hfunc)
+static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc,
+ u32 *rss_context, bool delete)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rss_params *rss = &priv->rss_params;
- int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
- bool hash_changed = false;
- void *in;
-
- if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
- (hfunc != ETH_RSS_HASH_XOR) &&
- (hfunc != ETH_RSS_HASH_TOP))
- return -EINVAL;
-
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ int err;
mutex_lock(&priv->state_lock);
-
- if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
- rss->hfunc = hfunc;
- hash_changed = true;
+ if (delete) {
+ err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
+ goto unlock;
}
- if (indir) {
- memcpy(rss->indirection_rqt, indir,
- sizeof(rss->indirection_rqt));
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- u32 rqtn = priv->indir_rqt.rqtn;
- struct mlx5e_redirect_rqt_param rrp = {
- .is_rss = true,
- {
- .rss = {
- .hfunc = rss->hfunc,
- .channels = &priv->channels,
- },
- },
- };
-
- mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
- }
- }
+ if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+ unsigned int count = priv->channels.params.num_channels;
- if (key) {
- memcpy(rss->toeplitz_hash_key, key,
- sizeof(rss->toeplitz_hash_key));
- hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP;
+ err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
+ if (err)
+ goto unlock;
}
- if (hash_changed)
- mlx5e_modify_tirs_hash(priv, in, inlen);
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+unlock:
mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0);
+}
- kvfree(in);
+int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
- return 0;
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+ mutex_unlock(&priv->state_lock);
+ return err;
}
#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100
@@ -1298,6 +1426,14 @@ static int mlx5e_set_tunable(struct net_device *dev,
return err;
}
+static void mlx5e_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_pause_get(priv, pause_stats);
+}
+
void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
struct ethtool_pauseparam *pauseparam)
{
@@ -1506,13 +1642,21 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
return mlx5_set_port_wol(mdev, mlx5_wol_mode);
}
+static void mlx5e_get_fec_stats(struct net_device *netdev,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_fec_get(priv, fec_stats);
+}
+
static int mlx5e_get_fecparam(struct net_device *netdev,
struct ethtool_fecparam *fecparam)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
- u8 fec_configured = 0;
- u32 fec_active = 0;
+ u16 fec_configured;
+ u32 fec_active;
int err;
err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured);
@@ -1520,14 +1664,14 @@ static int mlx5e_get_fecparam(struct net_device *netdev,
if (err)
return err;
- fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active,
- sizeof(u32) * BITS_PER_BYTE);
+ fecparam->active_fec = pplm2ethtool_fec((unsigned long)fec_active,
+ sizeof(unsigned long) * BITS_PER_BYTE);
if (!fecparam->active_fec)
return -EOPNOTSUPP;
- fecparam->fec = pplm2ethtool_fec((u_long)fec_configured,
- sizeof(u8) * BITS_PER_BYTE);
+ fecparam->fec = pplm2ethtool_fec((unsigned long)fec_configured,
+ sizeof(unsigned long) * BITS_PER_BYTE);
return 0;
}
@@ -1537,10 +1681,15 @@ static int mlx5e_set_fecparam(struct net_device *netdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
- u8 fec_policy = 0;
+ unsigned long fec_bitmap;
+ u16 fec_policy = 0;
int mode;
int err;
+ bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE);
+ if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1)
+ return -EOPNOTSUPP;
+
for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
if (!(pplm_fec_2_ethtool[mode] & fecparam->fec))
continue;
@@ -1660,7 +1809,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
if (size_read < 0) {
netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
__func__, size_read);
- return 0;
+ return size_read;
}
i += size_read;
@@ -1670,6 +1819,49 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
return 0;
}
+static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
+ const struct ethtool_module_eeprom *page_data,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_module_eeprom_query_params query;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 *data = page_data->data;
+ int size_read;
+ int i = 0;
+
+ if (!page_data->length)
+ return -EINVAL;
+
+ memset(data, 0, page_data->length);
+
+ query.offset = page_data->offset;
+ query.i2c_address = page_data->i2c_address;
+ query.bank = page_data->bank;
+ query.page = page_data->page;
+ while (i < page_data->length) {
+ query.size = page_data->length - i;
+ size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i);
+
+ /* Done reading, return how many bytes was read */
+ if (!size_read)
+ return i;
+
+ if (size_read == -EINVAL)
+ return -EINVAL;
+ if (size_read < 0) {
+ netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n",
+ __func__, size_read);
+ return i;
+ }
+
+ i += size_read;
+ query.offset += size_read;
+ }
+
+ return i;
+}
+
int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
struct ethtool_flash *flash)
{
@@ -1708,38 +1900,28 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_channels new_channels = {};
- bool mode_changed;
u8 cq_period_mode, current_cq_period_mode;
+ struct mlx5e_params new_params;
+
+ if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ cq_period_mode = cqe_mode_to_period_mode(enable);
- cq_period_mode = enable ?
- MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
- MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
current_cq_period_mode = is_rx_cq ?
priv->channels.params.rx_cq_moderation.cq_period_mode :
priv->channels.params.tx_cq_moderation.cq_period_mode;
- mode_changed = cq_period_mode != current_cq_period_mode;
-
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
- !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
- return -EOPNOTSUPP;
- if (!mode_changed)
+ if (cq_period_mode == current_cq_period_mode)
return 0;
- new_channels.params = priv->channels.params;
+ new_params = priv->channels.params;
if (is_rx_cq)
- mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode);
+ mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode);
else
- mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- return 0;
- }
+ mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode);
- return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
}
static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
@@ -1752,10 +1934,10 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
return set_pflag_cqe_based_moder(netdev, enable, true);
}
-int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val)
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val, bool rx_filter)
{
bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
int err = 0;
if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
@@ -1764,15 +1946,27 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
if (curr_val == new_val)
return 0;
- new_channels.params = priv->channels.params;
- MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+ if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) {
+ netdev_err(priv->netdev,
+ "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n");
+ return -EINVAL;
+ }
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- return 0;
+ if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n");
+ return -EINVAL;
}
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ new_params = priv->channels.params;
+ MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+ if (rx_filter)
+ new_params.ptp_rx = new_val;
+
+ if (new_params.ptp_rx == priv->channels.params.ptp_rx)
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+ else
+ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+ &new_params.ptp_rx, true);
if (err)
return err;
@@ -1788,16 +1982,17 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
+ bool rx_filter;
+ int err;
if (!MLX5_CAP_GEN(mdev, cqe_compression))
return -EOPNOTSUPP;
- if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
- netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
- return -EINVAL;
- }
+ rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE;
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter);
+ if (err)
+ return err;
- mlx5e_modify_rx_cqe_compression_locked(priv, enable);
priv->channels.params.rx_cqe_compress_def = enable;
return 0;
@@ -1807,29 +2002,28 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
if (enable) {
- if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
- return -EOPNOTSUPP;
- if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
- return -EINVAL;
- } else if (priv->channels.params.lro_en) {
- netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
+ /* Checking the regular RQ here; mlx5e_validate_xsk_param called
+ * from mlx5e_open_xsk will check for each XSK queue, and
+ * mlx5e_safe_switch_params will be reverted if any check fails.
+ */
+ int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params);
+
+ if (err)
+ return err;
+ } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
return -EINVAL;
}
- new_channels.params = priv->channels.params;
-
- MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
- mlx5e_set_rq_type(mdev, &new_channels.params);
+ new_params = priv->channels.params;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- return 0;
- }
+ MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
+ mlx5e_set_rq_type(mdev, &new_params);
- return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
}
static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
@@ -1854,26 +2048,75 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
+static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params new_params;
+
+ if (enable && !mlx5e_tx_mpwqe_supported(mdev))
+ return -EOPNOTSUPP;
+
+ new_params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_params, flag, enable);
+
+ return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+}
+
static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+}
+
+static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable);
+}
+
+static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
+{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
int err;
- if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
return -EOPNOTSUPP;
- new_channels.params = priv->channels.params;
-
- MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+ /* Don't allow changing the PTP state if HTB offload is active, because
+ * the numeration of the QoS SQs will change, while per-queue qdiscs are
+ * attached.
+ */
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
+ __func__);
+ return -EINVAL;
+ }
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- return 0;
+ new_params = priv->channels.params;
+ /* Don't allow enabling TX-port-TS if MQPRIO mode channel offload is
+ * active, since it defines explicitly which TC accepts the packet.
+ * This conflicts with TX-port-TS hijacking the PTP traffic to a specific
+ * HW TX-queue.
+ */
+ if (enable && new_params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ netdev_err(priv->netdev,
+ "%s: MQPRIO mode channel offload is active, cannot set the TX-port-TS\n",
+ __func__);
+ return -EINVAL;
}
+ MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable);
+ /* No need to verify SQ stop room as
+ * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both
+ * has the same log_sq_size.
+ */
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
+ if (!err)
+ priv->tx_ptp_opened = true;
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
return err;
}
@@ -1884,6 +2127,8 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_striding_rq", set_pflag_rx_striding_rq },
{ "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
{ "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+ { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe },
+ { "tx_port_ts", set_pflag_tx_port_ts },
};
static int mlx5e_handle_pflag(struct net_device *netdev,
@@ -1938,7 +2183,8 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev)
return priv->channels.params.pflags;
}
-static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
+int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rule_locs)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -1952,17 +2198,227 @@ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u
return 0;
}
- return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs);
+ return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
}
-static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
{
- return mlx5e_ethtool_set_rxnfc(dev, cmd);
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_rxnfc(priv, cmd);
+}
+
+static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode)
+{
+ struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page;
+ u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {};
+ u32 out[MLX5_ST_SZ_DW(pddr_reg)];
+ int err;
+
+ MLX5_SET(pddr_reg, in, local_port, 1);
+ MLX5_SET(pddr_reg, in, page_select,
+ MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE);
+
+ pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data);
+ MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page,
+ group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR);
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PDDR, 0, 0);
+ if (err)
+ return err;
+
+ pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data);
+ *status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page,
+ status_opcode);
+ return 0;
+}
+
+struct mlx5e_ethtool_link_ext_state_opcode_mapping {
+ u32 status_opcode;
+ enum ethtool_link_ext_state link_ext_state;
+ u8 link_ext_substate;
+};
+
+static const struct mlx5e_ethtool_link_ext_state_opcode_mapping
+mlx5e_link_ext_state_opcode_map[] = {
+ /* States relating to the autonegotiation or issues therein */
+ {2, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED},
+ {3, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED},
+ {4, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED},
+ {36, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE},
+ {38, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE},
+ {39, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD},
+
+ /* Failure during link training */
+ {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED},
+ {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT},
+ {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY},
+ {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0},
+ {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT},
+
+ /* Logical mismatch in physical coding sublayer or forward error correction sublayer */
+ {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK},
+ {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK},
+ {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS},
+ {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED},
+ {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED},
+
+ /* Signal integrity issues */
+ {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0},
+ {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS},
+ {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE},
+
+ /* No cable connected */
+ {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0},
+
+ /* Failure is related to cable, e.g., unsupported cable */
+ {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0},
+
+ /* Failure is related to EEPROM, e.g., failure during reading or parsing the data */
+ {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0},
+
+ /* Failure during calibration algorithm */
+ {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0},
+
+ /* The hardware is not able to provide the power required from cable or module */
+ {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0},
+
+ /* The module is overheated */
+ {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0},
+};
+
+static void
+mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping
+ link_ext_state_mapping,
+ struct ethtool_link_ext_state_info *link_ext_state_info)
+{
+ switch (link_ext_state_mapping.link_ext_state) {
+ case ETHTOOL_LINK_EXT_STATE_AUTONEG:
+ link_ext_state_info->autoneg =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE:
+ link_ext_state_info->link_training =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH:
+ link_ext_state_info->link_logical_mismatch =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY:
+ link_ext_state_info->bad_signal_integrity =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE:
+ link_ext_state_info->cable_issue =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ default:
+ break;
+ }
+
+ link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state;
+}
+
+static int
+mlx5e_get_link_ext_state(struct net_device *dev,
+ struct ethtool_link_ext_state_info *link_ext_state_info)
+{
+ struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping;
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ u32 status_opcode = 0;
+ int i;
+
+ /* Exit without data if the interface state is OK, since no extended data is
+ * available in such case
+ */
+ if (netif_carrier_ok(dev))
+ return -ENODATA;
+
+ if (query_port_status_opcode(priv->mdev, &status_opcode) ||
+ !status_opcode)
+ return -ENODATA;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) {
+ link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i];
+ if (link_ext_state_mapping.status_opcode == status_opcode) {
+ mlx5e_set_link_ext_state(link_ext_state_mapping,
+ link_ext_state_info);
+ return 0;
+ }
+ }
+
+ return -ENODATA;
+}
+
+static void mlx5e_get_eth_phy_stats(struct net_device *netdev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_eth_phy_get(priv, phy_stats);
+}
+
+static void mlx5e_get_eth_mac_stats(struct net_device *netdev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_eth_mac_get(priv, mac_stats);
+}
+
+static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_eth_ctrl_get(priv, ctrl_stats);
+}
+
+static void mlx5e_get_rmon_stats(struct net_device *netdev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_rmon_get(priv, rmon_stats, ranges);
}
const struct ethtool_ops mlx5e_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE |
+ ETHTOOL_COALESCE_USE_CQE,
.get_drvinfo = mlx5e_get_drvinfo,
.get_link = ethtool_op_get_link,
+ .get_link_ext_state = mlx5e_get_link_ext_state,
.get_strings = mlx5e_get_strings,
.get_sset_count = mlx5e_get_sset_count,
.get_ethtool_stats = mlx5e_get_ethtool_stats,
@@ -1978,10 +2434,13 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
.get_rxfh = mlx5e_get_rxfh,
.set_rxfh = mlx5e_set_rxfh,
+ .get_rxfh_context = mlx5e_get_rxfh_context,
+ .set_rxfh_context = mlx5e_set_rxfh_context,
.get_rxnfc = mlx5e_get_rxnfc,
.set_rxnfc = mlx5e_set_rxnfc,
.get_tunable = mlx5e_get_tunable,
.set_tunable = mlx5e_set_tunable,
+ .get_pause_stats = mlx5e_get_pause_stats,
.get_pauseparam = mlx5e_get_pauseparam,
.set_pauseparam = mlx5e_set_pauseparam,
.get_ts_info = mlx5e_get_ts_info,
@@ -1990,12 +2449,18 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.set_wol = mlx5e_set_wol,
.get_module_info = mlx5e_get_module_info,
.get_module_eeprom = mlx5e_get_module_eeprom,
+ .get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page,
.flash_device = mlx5e_flash_device,
.get_priv_flags = mlx5e_get_priv_flags,
.set_priv_flags = mlx5e_set_priv_flags,
.self_test = mlx5e_self_test,
.get_msglevel = mlx5e_get_msglevel,
.set_msglevel = mlx5e_set_msglevel,
+ .get_fec_stats = mlx5e_get_fec_stats,
.get_fecparam = mlx5e_get_fecparam,
.set_fecparam = mlx5e_set_fecparam,
+ .get_eth_phy_stats = mlx5e_get_eth_phy_stats,
+ .get_eth_mac_stats = mlx5e_get_eth_mac_stats,
+ .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats,
+ .get_rmon_stats = mlx5e_get_rmon_stats,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 73d3dc07331f..1892ccb889b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -35,18 +35,48 @@
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/mlx5/fs.h>
-#include "en.h"
+#include <linux/mlx5/mpfs.h>
+#include "en_tc.h"
#include "lib/mpfs.h"
+#include "en/ptp.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_flow_steering {
+ struct work_struct set_rx_mode_work;
+ bool state_destroy;
+ bool vlan_strip_disable;
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_namespace *egress_ns;
+#ifdef CONFIG_MLX5_EN_RXNFC
+ struct mlx5e_ethtool_steering *ethtool;
+#endif
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_promisc_table promisc;
+ struct mlx5e_vlan_table *vlan;
+ struct mlx5e_l2_table l2;
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_ttc_table *inner_ttc;
+#ifdef CONFIG_MLX5_EN_ARFS
+ struct mlx5e_arfs_tables *arfs;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_accel_fs_tcp *accel_tcp;
+#endif
+ struct mlx5e_fs_udp *udp;
+ struct mlx5e_fs_any *any;
+ struct mlx5e_ptp_fs *ptp_fs;
+};
-static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai, int type);
-static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai);
enum {
MLX5E_FULLMATCH = 0,
MLX5E_ALLMULTI = 1,
- MLX5E_PROMISC = 2,
};
enum {
@@ -69,12 +99,12 @@ struct mlx5e_l2_hash_node {
bool mpfs;
};
-static inline int mlx5e_hash_l2(u8 *addr)
+static inline int mlx5e_hash_l2(const u8 *addr)
{
return addr[5];
}
-static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr)
+static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr)
{
struct mlx5e_l2_hash_node *hn;
int ix = mlx5e_hash_l2(addr);
@@ -107,9 +137,31 @@ static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn)
kfree(hn);
}
-static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+struct mlx5e_vlan_table {
+ struct mlx5e_flow_table ft;
+ DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
+ DECLARE_BITMAP(active_svlans, VLAN_N_VID);
+ struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *untagged_rule;
+ struct mlx5_flow_handle *any_cvlan_rule;
+ struct mlx5_flow_handle *any_svlan_rule;
+ struct mlx5_flow_handle *trap_rule;
+ bool cvlan_filter_disabled;
+};
+
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan)
+{
+ return vlan->active_svlans;
+}
+
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan)
+{
+ return vlan->ft.t;
+}
+
+static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs)
{
- struct net_device *ndev = priv->netdev;
int max_list_size;
int list_size;
u16 *vlans;
@@ -118,35 +170,34 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
int i;
list_size = 0;
- for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID)
+ for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID)
list_size++;
- max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+ max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list);
if (list_size > max_list_size) {
- netdev_warn(ndev,
- "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
- list_size, max_list_size);
+ fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
list_size = max_list_size;
}
- vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
if (!vlans)
return -ENOMEM;
i = 0;
- for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) {
if (i >= list_size)
break;
vlans[i++] = vlan;
}
- err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+ err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size);
if (err)
- netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
- err);
+ fs_err(fs, "Failed to modify vport vlans list err(%d)\n",
+ err);
- kfree(vlans);
+ kvfree(vlans);
return err;
}
@@ -158,18 +209,18 @@ enum mlx5e_vlan_rule_type {
MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID,
};
-static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
enum mlx5e_vlan_rule_type rule_type,
u16 vid, struct mlx5_flow_spec *spec)
{
- struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle **rule_p;
MLX5_DECLARE_FLOW_ACT(flow_act);
int err = 0;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.l2.ft.t;
+ dest.ft = fs->l2.ft.t;
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
@@ -179,24 +230,24 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
* disabled in match value means both S & C tags
* don't exist (untagged of both)
*/
- rule_p = &priv->fs.vlan.untagged_rule;
+ rule_p = &fs->vlan->untagged_rule;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
break;
case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
- rule_p = &priv->fs.vlan.any_cvlan_rule;
+ rule_p = &fs->vlan->any_cvlan_rule;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
break;
case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
- rule_p = &priv->fs.vlan.any_svlan_rule;
+ rule_p = &fs->vlan->any_svlan_rule;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.svlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
break;
case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
- rule_p = &priv->fs.vlan.active_svlans_rule[vid];
+ rule_p = &fs->vlan->active_svlans_rule[vid];
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.svlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
@@ -206,7 +257,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
vid);
break;
default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
- rule_p = &priv->fs.vlan.active_cvlans_rule[vid];
+ rule_p = &fs->vlan->active_cvlans_rule[vid];
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
@@ -217,18 +268,21 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
break;
}
+ if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+ return 0;
+
*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(*rule_p)) {
err = PTR_ERR(*rule_p);
*rule_p = NULL;
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ fs_err(fs, "%s: add rule failed\n", __func__);
}
return err;
}
-static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+static int mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
enum mlx5e_vlan_rule_type rule_type, u16 vid)
{
struct mlx5_flow_spec *spec;
@@ -239,115 +293,188 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
return -ENOMEM;
if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID)
- mlx5e_vport_context_update_vlans(priv);
+ mlx5e_vport_context_update_vlans(fs);
- err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
+ err = __mlx5e_add_vlan_rule(fs, rule_type, vid, spec);
kvfree(spec);
return err;
}
-static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
- enum mlx5e_vlan_rule_type rule_type, u16 vid)
+static void mlx5e_fs_del_vlan_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
{
switch (rule_type) {
case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
- if (priv->fs.vlan.untagged_rule) {
- mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
- priv->fs.vlan.untagged_rule = NULL;
+ if (fs->vlan->untagged_rule) {
+ mlx5_del_flow_rules(fs->vlan->untagged_rule);
+ fs->vlan->untagged_rule = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
- if (priv->fs.vlan.any_cvlan_rule) {
- mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule);
- priv->fs.vlan.any_cvlan_rule = NULL;
+ if (fs->vlan->any_cvlan_rule) {
+ mlx5_del_flow_rules(fs->vlan->any_cvlan_rule);
+ fs->vlan->any_cvlan_rule = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
- if (priv->fs.vlan.any_svlan_rule) {
- mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule);
- priv->fs.vlan.any_svlan_rule = NULL;
+ if (fs->vlan->any_svlan_rule) {
+ mlx5_del_flow_rules(fs->vlan->any_svlan_rule);
+ fs->vlan->any_svlan_rule = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
- if (priv->fs.vlan.active_svlans_rule[vid]) {
- mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]);
- priv->fs.vlan.active_svlans_rule[vid] = NULL;
+ if (fs->vlan->active_svlans_rule[vid]) {
+ mlx5_del_flow_rules(fs->vlan->active_svlans_rule[vid]);
+ fs->vlan->active_svlans_rule[vid] = NULL;
}
break;
case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
- if (priv->fs.vlan.active_cvlans_rule[vid]) {
- mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]);
- priv->fs.vlan.active_cvlans_rule[vid] = NULL;
+ if (fs->vlan->active_cvlans_rule[vid]) {
+ mlx5_del_flow_rules(fs->vlan->active_cvlans_rule[vid]);
+ fs->vlan->active_cvlans_rule[vid] = NULL;
}
- mlx5e_vport_context_update_vlans(priv);
+ mlx5e_vport_context_update_vlans(fs);
break;
}
}
-static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv)
+static void mlx5e_fs_del_any_vid_rules(struct mlx5e_flow_steering *fs)
{
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
}
-static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv)
+static int mlx5e_fs_add_any_vid_rules(struct mlx5e_flow_steering *fs)
{
int err;
- err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
if (err)
return err;
- return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+ return mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num)
+{
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+ spec->flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ spec->flow_context.flow_tag = trap_id;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+ return rule;
+}
+
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
+{
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs->vlan->trap_rule = NULL;
+ fs_err(fs, "%s: add VLAN trap rule failed, err %d\n",
+ __func__, err);
+ return err;
+ }
+ fs->vlan->trap_rule = rule;
+ return 0;
+}
+
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs)
+{
+ if (fs->vlan->trap_rule) {
+ mlx5_del_flow_rules(fs->vlan->trap_rule);
+ fs->vlan->trap_rule = NULL;
+ }
+}
+
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
+{
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs->l2.trap_rule = NULL;
+ fs_err(fs, "%s: add MAC trap rule failed, err %d\n",
+ __func__, err);
+ return err;
+ }
+ fs->l2.trap_rule = rule;
+ return 0;
+}
+
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs)
+{
+ if (fs->l2.trap_rule) {
+ mlx5_del_flow_rules(fs->l2.trap_rule);
+ fs->l2.trap_rule = NULL;
+ }
}
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (!priv->fs.vlan.cvlan_filter_disabled)
+ if (!fs->vlan->cvlan_filter_disabled)
return;
- priv->fs.vlan.cvlan_filter_disabled = false;
- if (priv->netdev->flags & IFF_PROMISC)
+ fs->vlan->cvlan_filter_disabled = false;
+ if (promisc)
return;
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
}
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (priv->fs.vlan.cvlan_filter_disabled)
+ if (fs->vlan->cvlan_filter_disabled)
return;
- priv->fs.vlan.cvlan_filter_disabled = true;
- if (priv->netdev->flags & IFF_PROMISC)
+ fs->vlan->cvlan_filter_disabled = true;
+ if (promisc)
return;
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
}
-static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid)
+static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid)
{
int err;
- set_bit(vid, priv->fs.vlan.active_cvlans);
+ set_bit(vid, fs->vlan->active_cvlans);
- err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
if (err)
- clear_bit(vid, priv->fs.vlan.active_cvlans);
+ clear_bit(vid, fs->vlan->active_cvlans);
return err;
}
-static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
+static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev, u16 vid)
{
- struct net_device *netdev = priv->netdev;
int err;
- set_bit(vid, priv->fs.vlan.active_svlans);
+ set_bit(vid, fs->vlan->active_svlans);
- err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
if (err) {
- clear_bit(vid, priv->fs.vlan.active_svlans);
+ clear_bit(vid, fs->vlan->active_svlans);
return err;
}
@@ -356,75 +483,91 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
return err;
}
-int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (!fs->vlan) {
+ fs_err(fs, "Vlan doesn't exist\n");
+ return -EINVAL;
+ }
if (be16_to_cpu(proto) == ETH_P_8021Q)
- return mlx5e_vlan_rx_add_cvid(priv, vid);
+ return mlx5e_vlan_rx_add_cvid(fs, vid);
else if (be16_to_cpu(proto) == ETH_P_8021AD)
- return mlx5e_vlan_rx_add_svid(priv, vid);
+ return mlx5e_vlan_rx_add_svid(fs, netdev, vid);
return -EOPNOTSUPP;
}
-int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
+ if (!fs->vlan) {
+ fs_err(fs, "Vlan doesn't exist\n");
+ return -EINVAL;
+ }
if (be16_to_cpu(proto) == ETH_P_8021Q) {
- clear_bit(vid, priv->fs.vlan.active_cvlans);
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ clear_bit(vid, fs->vlan->active_cvlans);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
} else if (be16_to_cpu(proto) == ETH_P_8021AD) {
- clear_bit(vid, priv->fs.vlan.active_svlans);
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
- netdev_update_features(dev);
+ clear_bit(vid, fs->vlan->active_svlans);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ netdev_update_features(netdev);
}
return 0;
}
-static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs)
{
int i;
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
- for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
}
- for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
- mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
- if (priv->fs.vlan.cvlan_filter_disabled &&
- !(priv->netdev->flags & IFF_PROMISC))
- mlx5e_add_any_vid_rules(priv);
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_add_any_vid_rules(fs);
}
-static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs)
{
int i;
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
- for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
}
- for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
- mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ WARN_ON_ONCE(fs->state_destroy);
- if (priv->fs.vlan.cvlan_filter_disabled &&
- !(priv->netdev->flags & IFF_PROMISC))
- mlx5e_del_any_vid_rules(priv);
+ mlx5e_remove_vlan_trap(fs);
+
+ /* must be called after DESTROY bit is set and
+ * set_rx_mode is called and flushed
+ */
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_del_any_vid_rules(fs);
}
#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
-static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
+static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_hash_node *hn)
{
u8 action = hn->action;
@@ -435,9 +578,9 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
switch (action) {
case MLX5E_ACTION_ADD:
- mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+ mlx5e_add_l2_flow_rule(fs, &hn->ai, MLX5E_FULLMATCH);
if (!is_multicast_ether_addr(mac_addr)) {
- l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr);
+ l2_err = mlx5_mpfs_add_mac(fs->mdev, mac_addr);
hn->mpfs = !l2_err;
}
hn->action = MLX5E_ACTION_NONE;
@@ -445,52 +588,51 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
case MLX5E_ACTION_DEL:
if (!is_multicast_ether_addr(mac_addr) && hn->mpfs)
- l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr);
- mlx5e_del_l2_flow_rule(priv, &hn->ai);
+ l2_err = mlx5_mpfs_del_mac(fs->mdev, mac_addr);
+ mlx5e_del_l2_flow_rule(fs, &hn->ai);
mlx5e_del_l2_from_hash(hn);
break;
}
if (l2_err)
- netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
- action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err);
+ fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n",
+ action == MLX5E_ACTION_ADD ? "add" : "del",
+ mac_addr, l2_err);
}
-static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
+static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
- struct net_device *netdev = priv->netdev;
struct netdev_hw_addr *ha;
netif_addr_lock_bh(netdev);
- mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc,
- priv->netdev->dev_addr);
-
+ mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr);
netdev_for_each_uc_addr(ha, netdev)
- mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr);
+ mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr);
netdev_for_each_mc_addr(ha, netdev)
- mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr);
+ mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr);
netif_addr_unlock_bh(netdev);
}
-static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type,
+ struct net_device *ndev,
u8 addr_array[][ETH_ALEN], int size)
{
bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
- struct net_device *ndev = priv->netdev;
struct mlx5e_l2_hash_node *hn;
struct hlist_head *addr_list;
struct hlist_node *tmp;
int i = 0;
int hi;
- addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+ addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc;
if (is_uc) /* Make sure our own address is pushed first */
ether_addr_copy(addr_array[i++], ndev->dev_addr);
- else if (priv->fs.l2.broadcast_enabled)
+ else if (fs->l2.broadcast_enabled)
ether_addr_copy(addr_array[i++], ndev->broadcast);
mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
@@ -502,7 +644,8 @@ static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
}
}
-static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
int list_type)
{
bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
@@ -515,19 +658,18 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
int err;
int hi;
- size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0);
+ size = is_uc ? 0 : (fs->l2.broadcast_enabled ? 1 : 0);
max_size = is_uc ?
- 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
- 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+ 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_mc_list);
- addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+ addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc;
mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
size++;
if (size > max_size) {
- netdev_warn(priv->netdev,
- "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
- is_uc ? "UC" : "MC", size, max_size);
+ fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
size = max_size;
}
@@ -537,70 +679,145 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
err = -ENOMEM;
goto out;
}
- mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+ mlx5e_fill_addr_array(fs, list_type, netdev, addr_array, size);
}
- err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+ err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size);
out:
if (err)
- netdev_err(priv->netdev,
- "Failed to modify vport %s list err(%d)\n",
- is_uc ? "UC" : "MC", err);
+ fs_err(fs, "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
kfree(addr_array);
}
-static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+static void mlx5e_vport_context_update(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
- struct mlx5e_l2_table *ea = &priv->fs.l2;
+ struct mlx5e_l2_table *ea = &fs->l2;
- mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC);
- mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC);
- mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+ mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(fs->mdev, 0,
ea->allmulti_enabled,
ea->promisc_enabled);
}
-static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
+static void mlx5e_apply_netdev_addr(struct mlx5e_flow_steering *fs)
{
struct mlx5e_l2_hash_node *hn;
struct hlist_node *tmp;
int i;
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
- mlx5e_execute_l2_action(priv, hn);
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i)
+ mlx5e_execute_l2_action(fs, hn);
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
- mlx5e_execute_l2_action(priv, hn);
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i)
+ mlx5e_execute_l2_action(fs, hn);
}
-static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv)
+static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
{
struct mlx5e_l2_hash_node *hn;
struct hlist_node *tmp;
int i;
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i)
hn->action = MLX5E_ACTION_DEL;
- mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i)
hn->action = MLX5E_ACTION_DEL;
- if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state))
- mlx5e_sync_netdev_addr(priv);
+ if (fs->state_destroy)
+ mlx5e_sync_netdev_addr(fs, netdev);
- mlx5e_apply_netdev_addr(priv);
+ mlx5e_apply_netdev_addr(fs);
}
-void mlx5e_set_rx_mode_work(struct work_struct *work)
+#define MLX5E_PROMISC_GROUP0_SIZE BIT(0)
+#define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE
+
+static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
- set_rx_mode_work);
+ struct mlx5_flow_table *ft = fs->promisc.ft.t;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rule_p;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ int err = 0;
- struct mlx5e_l2_table *ea = &priv->fs.l2;
- struct net_device *ndev = priv->netdev;
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = mlx5_get_ttc_flow_table(fs->ttc);
+
+ rule_p = &fs->promisc.rule;
+ *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(*rule_p)) {
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ fs_err(fs, "%s: add promiscuous rule failed\n", __func__);
+ }
+ kvfree(spec);
+ return err;
+}
+
+static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_flow_table *ft = &fs->promisc.ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_PROMISC_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ fs_err(fs, "fail to create promisc table err=%d\n", err);
+ return err;
+ }
+
+ err = mlx5e_add_promisc_rule(fs);
+ if (err)
+ goto err_destroy_promisc_table;
+
+ return 0;
+
+err_destroy_promisc_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
- bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
- bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC);
- bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI);
+static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs)
+{
+ if (WARN(!fs->promisc.rule, "Trying to remove non-existing promiscuous rule"))
+ return;
+ mlx5_del_flow_rules(fs->promisc.rule);
+ fs->promisc.rule = NULL;
+}
+
+static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs)
+{
+ if (WARN(!fs->promisc.ft.t, "Trying to remove non-existing promiscuous table"))
+ return;
+ mlx5e_del_promisc_rule(fs);
+ mlx5_destroy_flow_table(fs->promisc.ft.t);
+ fs->promisc.ft.t = NULL;
+}
+
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
+{
+ struct mlx5e_l2_table *ea = &fs->l2;
+
+ bool rx_mode_enable = fs->state_destroy;
+ bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI);
bool broadcast_enabled = rx_mode_enable;
bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
@@ -609,37 +826,35 @@ void mlx5e_set_rx_mode_work(struct work_struct *work)
bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled;
bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled;
bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled;
+ int err;
if (enable_promisc) {
- if (!priv->channels.params.vlan_strip_disable)
- netdev_warn_once(ndev,
- "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
- mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC);
- if (!priv->fs.vlan.cvlan_filter_disabled)
- mlx5e_add_any_vid_rules(priv);
+ err = mlx5e_create_promisc_table(fs);
+ if (err)
+ enable_promisc = false;
+ if (!fs->vlan_strip_disable && !err)
+ fs_warn_once(fs,
+ "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
}
if (enable_allmulti)
- mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+ mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI);
if (enable_broadcast)
- mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+ mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH);
- mlx5e_handle_netdev_addr(priv);
+ mlx5e_handle_netdev_addr(fs, netdev);
if (disable_broadcast)
- mlx5e_del_l2_flow_rule(priv, &ea->broadcast);
+ mlx5e_del_l2_flow_rule(fs, &ea->broadcast);
if (disable_allmulti)
- mlx5e_del_l2_flow_rule(priv, &ea->allmulti);
- if (disable_promisc) {
- if (!priv->fs.vlan.cvlan_filter_disabled)
- mlx5e_del_any_vid_rules(priv);
- mlx5e_del_l2_flow_rule(priv, &ea->promisc);
- }
+ mlx5e_del_l2_flow_rule(fs, &ea->allmulti);
+ if (disable_promisc)
+ mlx5e_destroy_promisc_table(fs);
ea->promisc_enabled = promisc_enabled;
ea->allmulti_enabled = allmulti_enabled;
ea->broadcast_enabled = broadcast_enabled;
- mlx5e_vport_context_update(priv);
+ mlx5e_vport_context_update(fs, netdev);
}
static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
@@ -654,9 +869,9 @@ static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
ft->num_groups = 0;
}
-void mlx5e_init_l2_addr(struct mlx5e_priv *priv)
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev)
{
- ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast);
+ ether_addr_copy(fs->l2.broadcast.addr, netdev->broadcast);
}
void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
@@ -667,550 +882,64 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
ft->t = NULL;
}
-static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
-{
- int i;
-
- for (i = 0; i < MLX5E_NUM_TT; i++) {
- if (!IS_ERR_OR_NULL(ttc->rules[i])) {
- mlx5_del_flow_rules(ttc->rules[i]);
- ttc->rules[i] = NULL;
- }
- }
-
- for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
- if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
- mlx5_del_flow_rules(ttc->tunnel_rules[i]);
- ttc->tunnel_rules[i] = NULL;
- }
- }
-}
-
-struct mlx5e_etype_proto {
- u16 etype;
- u8 proto;
-};
-
-static struct mlx5e_etype_proto ttc_rules[] = {
- [MLX5E_TT_IPV4_TCP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_TCP,
- },
- [MLX5E_TT_IPV6_TCP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_TCP,
- },
- [MLX5E_TT_IPV4_UDP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_UDP,
- },
- [MLX5E_TT_IPV6_UDP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_UDP,
- },
- [MLX5E_TT_IPV4_IPSEC_AH] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_AH,
- },
- [MLX5E_TT_IPV6_IPSEC_AH] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_AH,
- },
- [MLX5E_TT_IPV4_IPSEC_ESP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_ESP,
- },
- [MLX5E_TT_IPV6_IPSEC_ESP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_ESP,
- },
- [MLX5E_TT_IPV4] = {
- .etype = ETH_P_IP,
- .proto = 0,
- },
- [MLX5E_TT_IPV6] = {
- .etype = ETH_P_IPV6,
- .proto = 0,
- },
- [MLX5E_TT_ANY] = {
- .etype = 0,
- .proto = 0,
- },
-};
-
-static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
- [MLX5E_TT_IPV4_GRE] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_GRE,
- },
- [MLX5E_TT_IPV6_GRE] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_GRE,
- },
- [MLX5E_TT_IPV4_IPIP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_IPIP,
- },
- [MLX5E_TT_IPV6_IPIP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_IPIP,
- },
- [MLX5E_TT_IPV4_IPV6] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_IPV6,
- },
- [MLX5E_TT_IPV6_IPV6] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_IPV6,
- },
-
-};
-
-bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type)
-{
- switch (proto_type) {
- case IPPROTO_GRE:
- return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
- case IPPROTO_IPIP:
- case IPPROTO_IPV6:
- return MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip);
- default:
- return false;
- }
-}
-
-bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev)
-{
- int tt;
-
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (mlx5e_tunnel_proto_supported(mdev, ttc_tunnel_rules[tt].proto))
- return true;
- }
- return false;
-}
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
-{
- return (mlx5e_any_tunnel_proto_supported(mdev) &&
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static u8 mlx5e_etype_to_ipv(u16 ethertype)
-{
- if (ethertype == ETH_P_IP)
- return 4;
-
- if (ethertype == ETH_P_IPV6)
- return 6;
-
- return 0;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_destination *dest,
- u16 etype,
- u8 proto)
-{
- int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
- MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- u8 ipv;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return ERR_PTR(-ENOMEM);
-
- if (proto) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
- }
-
- ipv = mlx5e_etype_to_ipv(etype);
- if (match_ipv_outer && ipv) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
- } else if (etype) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
- }
-
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
- }
-
- kvfree(spec);
- return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
- struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_handle **rules;
- struct mlx5_flow_table *ft;
- int tt;
- int err;
-
- ft = ttc->ft.t;
- rules = ttc->rules;
-
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
- if (tt == MLX5E_TT_ANY)
- dest.tir_num = params->any_tt_tirn;
- else
- dest.tir_num = params->indir_tirn[tt];
- rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
- ttc_rules[tt].etype,
- ttc_rules[tt].proto);
- if (IS_ERR(rules[tt]))
- goto del_rules;
- }
-
- if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return 0;
-
- rules = ttc->tunnel_rules;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = params->inner_ttc->ft.t;
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (!mlx5e_tunnel_proto_supported(priv->mdev,
- ttc_tunnel_rules[tt].proto))
- continue;
- rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
- ttc_tunnel_rules[tt].etype,
- ttc_tunnel_rules[tt].proto);
- if (IS_ERR(rules[tt]))
- goto del_rules;
- }
-
- return 0;
-
-del_rules:
- err = PTR_ERR(rules[tt]);
- rules[tt] = NULL;
- mlx5e_cleanup_ttc_rules(ttc);
- return err;
-}
-
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
- bool use_ipv)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int ix = 0;
- u32 *in;
- int err;
- u8 *mc;
-
- ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
- sizeof(*ft->g), GFP_KERNEL);
- if (!ft->g)
- return -ENOMEM;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- kfree(ft->g);
- return -ENOMEM;
- }
-
- /* L4 Group */
- mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
- if (use_ipv)
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
- else
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP1_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* L3 Group */
- MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP2_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* Any Group */
- memset(in, 0, inlen);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP3_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- kvfree(in);
- return 0;
-
-err:
- err = PTR_ERR(ft->g[ft->num_groups]);
- ft->g[ft->num_groups] = NULL;
- kvfree(in);
-
- return err;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_destination *dest,
- u16 etype, u8 proto)
-{
- MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- u8 ipv;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return ERR_PTR(-ENOMEM);
-
- ipv = mlx5e_etype_to_ipv(etype);
- if (etype && ipv) {
- spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
- MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
- }
-
- if (proto) {
- spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
- MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
- }
-
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
- }
-
- kvfree(spec);
- return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
- struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_handle **rules;
- struct mlx5_flow_table *ft;
- int err;
- int tt;
-
- ft = ttc->ft.t;
- rules = ttc->rules;
-
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
- if (tt == MLX5E_TT_ANY)
- dest.tir_num = params->any_tt_tirn;
- else
- dest.tir_num = params->indir_tirn[tt];
-
- rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
- ttc_rules[tt].etype,
- ttc_rules[tt].proto);
- if (IS_ERR(rules[tt]))
- goto del_rules;
- }
-
- return 0;
-
-del_rules:
- err = PTR_ERR(rules[tt]);
- rules[tt] = NULL;
- mlx5e_cleanup_ttc_rules(ttc);
- return err;
-}
-
-static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int ix = 0;
- u32 *in;
- int err;
- u8 *mc;
-
- ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
- if (!ft->g)
- return -ENOMEM;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- kfree(ft->g);
- return -ENOMEM;
- }
-
- /* L4 Group */
- mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
- MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP1_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* L3 Group */
- MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP2_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* Any Group */
- memset(in, 0, inlen);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP3_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- kvfree(in);
- return 0;
-
-err:
- err = PTR_ERR(ft->g[ft->num_groups]);
- ft->g[ft->num_groups] = NULL;
- kvfree(in);
-
- return err;
-}
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
- struct ttc_params *ttc_params)
-{
- ttc_params->any_tt_tirn = priv->direct_tir[0].tirn;
- ttc_params->inner_ttc = &priv->fs.inner_ttc;
-}
-
-void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
- ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss_inner(rx_res,
+ tt);
+ }
}
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
- ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
-}
-int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5e_flow_table *ft = &ttc->ft;
- int err;
-
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return 0;
-
- ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss(rx_res, tt);
}
- err = mlx5e_create_inner_ttc_table_groups(ttc);
- if (err)
- goto err;
-
- err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
- if (err)
- goto err;
-
- return 0;
-
-err:
- mlx5e_destroy_flow_table(ft);
- return err;
-}
-
-void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc)
-{
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ ttc_params->inner_ttc = tunnel;
+ if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev))
return;
- mlx5e_cleanup_ttc_rules(ttc);
- mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc)
-{
- mlx5e_cleanup_ttc_rules(ttc);
- mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int err;
-
- ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ ttc_params->tunnel_dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->tunnel_dests[tt].ft =
+ mlx5_get_ttc_flow_table(fs->inner_ttc);
}
-
- err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
- if (err)
- goto err;
-
- err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
- if (err)
- goto err;
-
- return 0;
-err:
- mlx5e_destroy_flow_table(ft);
- return err;
}
-static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai)
{
if (!IS_ERR_OR_NULL(ai->rule)) {
@@ -1219,10 +948,10 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
}
}
-static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai, int type)
{
- struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_spec *spec;
@@ -1240,7 +969,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
outer_headers.dmac_47_16);
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.ttc.ft.t;
+ dest.ft = mlx5_get_ttc_flow_table(fs->ttc);
switch (type) {
case MLX5E_FULLMATCH:
@@ -1254,15 +983,11 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
mc_dmac[0] = 0x01;
mv_dmac[0] = 0x01;
break;
-
- case MLX5E_PROMISC:
- break;
}
ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(ai->rule)) {
- netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
- __func__, mv_dmac);
+ fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac);
err = PTR_ERR(ai->rule);
ai->rule = NULL;
}
@@ -1273,12 +998,12 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
}
#define MLX5E_NUM_L2_GROUPS 3
-#define MLX5E_L2_GROUP1_SIZE BIT(0)
-#define MLX5E_L2_GROUP2_SIZE BIT(15)
-#define MLX5E_L2_GROUP3_SIZE BIT(0)
+#define MLX5E_L2_GROUP1_SIZE BIT(15)
+#define MLX5E_L2_GROUP2_SIZE BIT(0)
+#define MLX5E_L2_GROUP_TRAP_SIZE BIT(0) /* must be last */
#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\
MLX5E_L2_GROUP2_SIZE +\
- MLX5E_L2_GROUP3_SIZE)
+ MLX5E_L2_GROUP_TRAP_SIZE)
static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1301,7 +1026,9 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
mc_dmac = MLX5_ADDR_OF(fte_match_param, mc,
outer_headers.dmac_47_16);
- /* Flow Group for promiscuous */
+ /* Flow Group for full match */
+ eth_broadcast_addr(mc_dmac);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
MLX5_SET_CFG(in, start_flow_index, ix);
ix += MLX5E_L2_GROUP1_SIZE;
MLX5_SET_CFG(in, end_flow_index, ix - 1);
@@ -1310,9 +1037,9 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
goto err_destroy_groups;
ft->num_groups++;
- /* Flow Group for full match */
- eth_broadcast_addr(mc_dmac);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ /* Flow Group for allmulti */
+ eth_zero_addr(mc_dmac);
+ mc_dmac[0] = 0x01;
MLX5_SET_CFG(in, start_flow_index, ix);
ix += MLX5E_L2_GROUP2_SIZE;
MLX5_SET_CFG(in, end_flow_index, ix - 1);
@@ -1321,11 +1048,10 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
goto err_destroy_groups;
ft->num_groups++;
- /* Flow Group for allmulti */
- eth_zero_addr(mc_dmac);
- mc_dmac[0] = 0x01;
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_L2_GROUP3_SIZE;
+ ix += MLX5E_L2_GROUP_TRAP_SIZE;
MLX5_SET_CFG(in, end_flow_index, ix - 1);
ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
if (IS_ERR(ft->g[ft->num_groups]))
@@ -1340,18 +1066,19 @@ err_destroy_groups:
ft->g[ft->num_groups] = NULL;
mlx5e_destroy_groups(ft);
kvfree(in);
+ kfree(ft->g);
return err;
}
-static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs)
{
- mlx5e_destroy_flow_table(&priv->fs.l2.ft);
+ mlx5e_destroy_flow_table(&fs->l2.ft);
}
-static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
+static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_l2_table *l2_table = &priv->fs.l2;
+ struct mlx5e_l2_table *l2_table = &fs->l2;
struct mlx5e_flow_table *ft = &l2_table->ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -1362,7 +1089,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_L2_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
@@ -1382,15 +1109,17 @@ err_destroy_flow_table:
return err;
}
-#define MLX5E_NUM_VLAN_GROUPS 4
+#define MLX5E_NUM_VLAN_GROUPS 5
#define MLX5E_VLAN_GROUP0_SIZE BIT(12)
#define MLX5E_VLAN_GROUP1_SIZE BIT(12)
#define MLX5E_VLAN_GROUP2_SIZE BIT(1)
#define MLX5E_VLAN_GROUP3_SIZE BIT(0)
+#define MLX5E_VLAN_GROUP_TRAP_SIZE BIT(0) /* must be last */
#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\
MLX5E_VLAN_GROUP1_SIZE +\
MLX5E_VLAN_GROUP2_SIZE +\
- MLX5E_VLAN_GROUP3_SIZE)
+ MLX5E_VLAN_GROUP3_SIZE +\
+ MLX5E_VLAN_GROUP_TRAP_SIZE)
static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in,
int inlen)
@@ -1445,6 +1174,15 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in
goto err_destroy_groups;
ft->num_groups++;
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP_TRAP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
return 0;
err_destroy_groups:
@@ -1471,25 +1209,23 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
return err;
}
-static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
+static int mlx5e_fs_create_vlan_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_flow_table *ft = &priv->fs.vlan.ft;
struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
int err;
+ ft = &fs->vlan->ft;
ft->num_groups = 0;
ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
ft_attr.level = MLX5E_VLAN_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
+ if (IS_ERR(ft->t))
+ return PTR_ERR(ft->t);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
- }
ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
if (!ft->g) {
err = -ENOMEM;
@@ -1500,7 +1236,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
if (err)
goto err_free_g;
- mlx5e_add_vlan_rules(priv);
+ mlx5e_fs_add_vlan_rules(fs);
return 0;
@@ -1508,94 +1244,336 @@ err_free_g:
kfree(ft->g);
err_destroy_vlan_table:
mlx5_destroy_flow_table(ft->t);
- ft->t = NULL;
return err;
}
-static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_del_vlan_rules(fs);
+ mlx5e_destroy_flow_table(&fs->vlan->ft);
+}
+
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs)
+{
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
+ return;
+ mlx5_destroy_ttc_table(fs->inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs)
+{
+ mlx5_destroy_ttc_table(fs->ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
{
- mlx5e_del_vlan_rules(priv);
- mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
+ struct ttc_params ttc_params = {};
+
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
+ return 0;
+
+ mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params);
+ fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev,
+ &ttc_params);
+ if (IS_ERR(fs->inner_ttc))
+ return PTR_ERR(fs->inner_ttc);
+ return 0;
}
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
{
struct ttc_params ttc_params = {};
- int tt, err;
- priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
+ mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true);
+ fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params);
+ if (IS_ERR(fs->ttc))
+ return PTR_ERR(fs->ttc);
+ return 0;
+}
- if (!priv->fs.ns)
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev)
+{
+ struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ int err;
+
+ if (!ns)
return -EOPNOTSUPP;
- err = mlx5e_arfs_create_tables(priv);
+ mlx5e_fs_set_ns(fs, ns, false);
+ err = mlx5e_arfs_create_tables(fs, rx_res,
+ !!(netdev->hw_features & NETIF_F_NTUPLE));
if (err) {
- netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
- err);
- priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ fs_err(fs, "Failed to create arfs tables, err=%d\n", err);
+ netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- mlx5e_set_ttc_basic_params(priv, &ttc_params);
- mlx5e_set_inner_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
-
- err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ err = mlx5e_create_inner_ttc_table(fs, rx_res);
if (err) {
- netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create inner ttc table, err=%d\n", err);
goto err_destroy_arfs_tables;
}
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ err = mlx5e_create_ttc_table(fs, rx_res);
if (err) {
- netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create ttc table, err=%d\n", err);
goto err_destroy_inner_ttc_table;
}
- err = mlx5e_create_l2_table(priv);
+ err = mlx5e_create_l2_table(fs);
if (err) {
- netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create l2 table, err=%d\n", err);
goto err_destroy_ttc_table;
}
- err = mlx5e_create_vlan_table(priv);
+ err = mlx5e_fs_create_vlan_table(fs);
if (err) {
- netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create vlan table, err=%d\n", err);
goto err_destroy_l2_table;
}
- mlx5e_ethtool_init_steering(priv);
+ err = mlx5e_ptp_alloc_rx_fs(fs, profile);
+ if (err)
+ goto err_destory_vlan_table;
+
+ mlx5e_ethtool_init_steering(fs);
return 0;
+err_destory_vlan_table:
+ mlx5e_destroy_vlan_table(fs);
err_destroy_l2_table:
- mlx5e_destroy_l2_table(priv);
+ mlx5e_destroy_l2_table(fs);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_ttc_table(fs);
err_destroy_inner_ttc_table:
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_destroy_inner_ttc_table(fs);
err_destroy_arfs_tables:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE));
return err;
}
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile)
+{
+ mlx5e_ptp_free_rx_fs(fs, profile);
+ mlx5e_destroy_vlan_table(fs);
+ mlx5e_destroy_l2_table(fs);
+ mlx5e_destroy_ttc_table(fs);
+ mlx5e_destroy_inner_ttc_table(fs);
+ mlx5e_arfs_destroy_tables(fs, ntuple);
+ mlx5e_ethtool_cleanup_steering(fs);
+}
+
+static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs)
+{
+ fs->vlan = kvzalloc(sizeof(*fs->vlan), GFP_KERNEL);
+ if (!fs->vlan)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs)
+{
+ kvfree(fs->vlan);
+}
+
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs)
+{
+ return fs->vlan;
+}
+
+static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs)
+{
+ fs->tc = mlx5e_tc_table_alloc();
+ if (IS_ERR(fs->tc))
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_tc_table_free(fs->tc);
+}
+
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs)
+{
+ return fs->tc;
+}
+
+#ifdef CONFIG_MLX5_EN_RXNFC
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{
+ return mlx5e_ethtool_alloc(&fs->ethtool);
+}
+
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_ethtool_free(fs->ethtool);
+}
+
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs)
+{
+ return fs->ethtool;
+}
+#else
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{ return 0; }
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { }
+#endif
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy)
+{
+ struct mlx5e_flow_steering *fs;
+ int err;
+
+ fs = kvzalloc(sizeof(*fs), GFP_KERNEL);
+ if (!fs)
+ goto err;
+
+ fs->mdev = mdev;
+ fs->state_destroy = state_destroy;
+ if (mlx5e_profile_feature_cap(profile, FS_VLAN)) {
+ err = mlx5e_fs_vlan_alloc(fs);
+ if (err)
+ goto err_free_fs;
+ }
+
+ if (mlx5e_profile_feature_cap(profile, FS_TC)) {
+ err = mlx5e_fs_tc_alloc(fs);
+ if (err)
+ goto err_free_vlan;
+ }
+
+ err = mlx5e_fs_ethtool_alloc(fs);
+ if (err)
+ goto err_free_tc;
+
+ return fs;
+err_free_tc:
+ mlx5e_fs_tc_free(fs);
+err_free_vlan:
+ mlx5e_fs_vlan_free(fs);
+err_free_fs:
+ kvfree(fs);
+err:
+ return NULL;
+}
+
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_fs_ethtool_free(fs);
+ mlx5e_fs_tc_free(fs);
+ mlx5e_fs_vlan_free(fs);
+ kvfree(fs);
+}
+
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs)
+{
+ return &fs->l2;
+}
+
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress)
+{
+ return egress ? fs->egress_ns : fs->ns;
+}
+
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress)
+{
+ if (!egress)
+ fs->ns = ns;
+ else
+ fs->egress_ns = ns;
+}
+
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner)
+{
+ return inner ? fs->inner_ttc : fs->ttc;
+}
+
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner)
+{
+ if (!inner)
+ fs->ttc = ttc;
+ else
+ fs->inner_ttc = ttc;
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs)
+{
+ return fs->arfs;
+}
+
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs)
+{
+ fs->arfs = arfs;
+}
+#endif
+
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs)
+{
+ return fs->ptp_fs;
+}
+
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs)
+{
+ fs->ptp_fs = ptp_fs;
+}
+
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs)
+{
+ return fs->any;
+}
+
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any)
+{
+ fs->any = any;
+}
+
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs)
+{
+ return fs->accel_tcp;
+}
+
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp)
+{
+ fs->accel_tcp = accel_tcp;
+}
+#endif
+
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy)
+{
+ fs->state_destroy = state_destroy;
+}
+
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs,
+ bool vlan_strip_disable)
+{
+ fs->vlan_strip_disable = vlan_strip_disable;
+}
+
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs)
+{
+ return fs->udp;
+}
+
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp)
+{
+ fs->udp = udp;
+}
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs)
{
- mlx5e_destroy_vlan_table(priv);
- mlx5e_destroy_l2_table(priv);
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
- mlx5e_arfs_destroy_tables(priv);
- mlx5e_ethtool_cleanup_steering(priv);
+ return fs->mdev;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 3bc2ac3d53fc..aac32e505c14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -33,13 +33,37 @@
#include <linux/mlx5/fs.h>
#include "en.h"
#include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_ethtool_table {
+ struct mlx5_flow_table *ft;
+ int num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+ struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+ struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
+ struct list_head rules;
+ int tot_num_rules;
+};
+
+static int flow_type_to_traffic_type(u32 flow_type);
+
+static u32 flow_type_mask(u32 flow_type)
+{
+ return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+}
struct mlx5e_ethtool_rule {
struct list_head list;
struct ethtool_rx_flow_spec flow_spec;
struct mlx5_flow_handle *rule;
struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5e_rss *rss;
};
static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
@@ -58,6 +82,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
struct ethtool_rx_flow_spec *fs,
int num_tuples)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_ethtool_table *eth_ft;
struct mlx5_flow_namespace *ns;
@@ -66,25 +91,25 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
int table_size;
int prio;
- switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ switch (flow_type_mask(fs->flow_type)) {
case TCP_V4_FLOW:
case UDP_V4_FLOW:
case TCP_V6_FLOW:
case UDP_V6_FLOW:
max_tuples = ETHTOOL_NUM_L3_L4_FTS;
prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
- eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ eth_ft = &ethtool->l3_l4_ft[prio];
break;
case IP_USER_FLOW:
case IPV6_USER_FLOW:
max_tuples = ETHTOOL_NUM_L3_L4_FTS;
prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
- eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ eth_ft = &ethtool->l3_l4_ft[prio];
break;
case ETHER_FLOW:
max_tuples = ETHTOOL_NUM_L2_FTS;
prio = max_tuples - num_tuples;
- eth_ft = &priv->fs.ethtool.l2_ft[prio];
+ eth_ft = &ethtool->l2_ft[prio];
prio += MLX5E_ETHTOOL_L2_PRIO;
break;
default:
@@ -329,7 +354,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
outer_headers);
void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
- u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+ u32 flow_type = flow_type_mask(fs->flow_type);
switch (flow_type) {
case TCP_V4_FLOW:
@@ -374,15 +399,16 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
static void add_rule_to_list(struct mlx5e_priv *priv,
struct mlx5e_ethtool_rule *rule)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct list_head *head = &ethtool->rules;
struct mlx5e_ethtool_rule *iter;
- struct list_head *head = &priv->fs.ethtool.rules;
- list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ list_for_each_entry(iter, &ethtool->rules, list) {
if (iter->flow_spec.location > rule->flow_spec.location)
break;
head = &iter->list;
}
- priv->fs.ethtool.tot_num_rules++;
+ ethtool->tot_num_rules++;
list_add(&rule->list, head);
}
@@ -397,10 +423,45 @@ static bool outer_header_zero(u32 *match_criteria)
size - 1);
}
+static int flow_get_tirn(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule,
+ struct ethtool_rx_flow_spec *fs,
+ u32 rss_context, u32 *tirn)
+{
+ if (fs->flow_type & FLOW_RSS) {
+ struct mlx5e_packet_merge_param pkt_merge_param;
+ struct mlx5e_rss *rss;
+ u32 flow_type;
+ int err;
+ int tt;
+
+ rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context);
+ if (!rss)
+ return -ENOENT;
+
+ flow_type = flow_type_mask(fs->flow_type);
+ tt = flow_type_to_traffic_type(flow_type);
+ if (tt < 0)
+ return -EINVAL;
+
+ pkt_merge_param = priv->channels.params.packet_merge;
+ err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
+ if (err)
+ return err;
+ eth_rule->rss = rss;
+ mlx5e_rss_refcnt_inc(eth_rule->rss);
+ } else {
+ *tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie);
+ }
+
+ return 0;
+}
+
static struct mlx5_flow_handle *
add_ethtool_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule,
struct mlx5_flow_table *ft,
- struct ethtool_rx_flow_spec *fs)
+ struct ethtool_rx_flow_spec *fs, u32 rss_context)
{
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
struct mlx5_flow_destination *dst = NULL;
@@ -419,22 +480,17 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
} else {
- struct mlx5e_params *params = &priv->channels.params;
- enum mlx5e_rq_group group;
- struct mlx5e_tir *tir;
- u16 ix;
-
- mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
- tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
-
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst) {
err = -ENOMEM;
goto free;
}
+ err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num);
+ if (err)
+ goto free;
+
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = tir[ix].tirn;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
@@ -453,13 +509,16 @@ free:
return err ? ERR_PTR(err) : rule;
}
-static void del_ethtool_rule(struct mlx5e_priv *priv,
+static void del_ethtool_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_ethtool_rule *eth_rule)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
if (eth_rule->rule)
mlx5_del_flow_rules(eth_rule->rule);
+ if (eth_rule->rss)
+ mlx5e_rss_refcnt_dec(eth_rule->rss);
list_del(&eth_rule->list);
- priv->fs.ethtool.tot_num_rules--;
+ ethtool->tot_num_rules--;
put_flow_table(eth_rule->eth_ft);
kfree(eth_rule);
}
@@ -467,9 +526,10 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
int location)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5e_ethtool_rule *iter;
- list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ list_for_each_entry(iter, &ethtool->rules, list) {
if (iter->flow_spec.location == location)
return iter;
}
@@ -483,7 +543,7 @@ static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
eth_rule = find_ethtool_rule(priv, location);
if (eth_rule)
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
if (!eth_rule)
@@ -614,11 +674,10 @@ static int validate_flow(struct mlx5e_priv *priv,
return -ENOSPC;
if (fs->ring_cookie != RX_CLS_FLOW_DISC)
- if (!mlx5e_qid_validate(priv->profile, &priv->channels.params,
- fs->ring_cookie))
+ if (fs->ring_cookie >= priv->channels.params.num_channels)
return -EINVAL;
- switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ switch (flow_type_mask(fs->flow_type)) {
case ETHER_FLOW:
num_tuples += validate_ethter(fs);
break;
@@ -667,7 +726,7 @@ static int validate_flow(struct mlx5e_priv *priv,
static int
mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
- struct ethtool_rx_flow_spec *fs)
+ struct ethtool_rx_flow_spec *fs, u32 rss_context)
{
struct mlx5e_ethtool_table *eth_ft;
struct mlx5e_ethtool_rule *eth_rule;
@@ -694,11 +753,8 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
eth_rule->flow_spec = *fs;
eth_rule->eth_ft = eth_ft;
- if (!eth_ft->ft) {
- err = -EINVAL;
- goto del_ethtool_rule;
- }
- rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+
+ rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
goto del_ethtool_rule;
@@ -709,7 +765,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
return 0;
del_ethtool_rule:
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
return err;
}
@@ -729,7 +785,7 @@ mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location)
goto out;
}
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
out:
return err;
}
@@ -738,16 +794,27 @@ static int
mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
struct ethtool_rxnfc *info, int location)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5e_ethtool_rule *eth_rule;
if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
return -EINVAL;
- list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
- if (eth_rule->flow_spec.location == location) {
- info->fs = eth_rule->flow_spec;
+ list_for_each_entry(eth_rule, &ethtool->rules, list) {
+ int index;
+
+ if (eth_rule->flow_spec.location != location)
+ continue;
+ if (!info)
return 0;
- }
+ info->fs = eth_rule->flow_spec;
+ if (!eth_rule->rss)
+ return 0;
+ index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss);
+ if (index < 0)
+ return index;
+ info->rss_context = index;
+ return 0;
}
return -ENOENT;
@@ -763,7 +830,7 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
info->data = MAX_NUM_OF_ETHTOOL_RULES;
while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
- err = mlx5e_ethtool_get_flow(priv, info, location);
+ err = mlx5e_ethtool_get_flow(priv, NULL, location);
if (!err)
rule_locs[idx++] = location;
location++;
@@ -771,59 +838,74 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
return err;
}
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{
+ *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL);
+ if (!*ethtool)
+ return -ENOMEM;
+ return 0;
+}
+
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool)
{
+ kvfree(ethtool);
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
struct mlx5e_ethtool_rule *iter;
struct mlx5e_ethtool_rule *temp;
- list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list)
- del_ethtool_rule(priv, iter);
+ list_for_each_entry_safe(iter, temp, &ethtool->rules, list)
+ del_ethtool_rule(fs, iter);
}
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs)
{
- INIT_LIST_HEAD(&priv->fs.ethtool.rules);
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
+
+ INIT_LIST_HEAD(&ethtool->rules);
}
-static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+static int flow_type_to_traffic_type(u32 flow_type)
{
switch (flow_type) {
case TCP_V4_FLOW:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
case TCP_V6_FLOW:
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
case UDP_V4_FLOW:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
case UDP_V6_FLOW:
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
case AH_V4_FLOW:
- return MLX5E_TT_IPV4_IPSEC_AH;
+ return MLX5_TT_IPV4_IPSEC_AH;
case AH_V6_FLOW:
- return MLX5E_TT_IPV6_IPSEC_AH;
+ return MLX5_TT_IPV6_IPSEC_AH;
case ESP_V4_FLOW:
- return MLX5E_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV4_IPSEC_ESP;
case ESP_V6_FLOW:
- return MLX5E_TT_IPV6_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
case IPV4_FLOW:
- return MLX5E_TT_IPV4;
+ return MLX5_TT_IPV4;
case IPV6_FLOW:
- return MLX5E_TT_IPV6;
+ return MLX5_TT_IPV6;
default:
- return MLX5E_NUM_INDIR_TIRS;
+ return -EINVAL;
}
}
static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
struct ethtool_rxnfc *nfc)
{
- int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
- enum mlx5e_traffic_types tt;
u8 rx_hash_field = 0;
- void *in;
+ int err;
+ int tt;
tt = flow_type_to_traffic_type(nfc->flow_type);
- if (tt == MLX5E_NUM_INDIR_TIRS)
- return -EINVAL;
+ if (tt < 0)
+ return tt;
/* RSS does not support anything other than hashing to queues
* on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
@@ -848,35 +930,24 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
if (nfc->data & RXH_L4_B_2_3)
rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
mutex_lock(&priv->state_lock);
-
- if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
- goto out;
-
- priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
- mlx5e_modify_tirs_hash(priv, in, inlen);
-
-out:
+ err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
mutex_unlock(&priv->state_lock);
- kvfree(in);
- return 0;
+
+ return err;
}
static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
struct ethtool_rxnfc *nfc)
{
- enum mlx5e_traffic_types tt;
u32 hash_field = 0;
+ int tt;
tt = flow_type_to_traffic_type(nfc->flow_type);
- if (tt == MLX5E_NUM_INDIR_TIRS)
- return -EINVAL;
+ if (tt < 0)
+ return tt;
- hash_field = priv->rss_params.rx_hash_fields[tt];
+ hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
nfc->data = 0;
if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
@@ -891,14 +962,13 @@ static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
return 0;
}
-int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
int err = 0;
switch (cmd->cmd) {
case ETHTOOL_SRXCLSRLINS:
- err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+ err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context);
break;
case ETHTOOL_SRXCLSRLDEL:
err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
@@ -914,15 +984,15 @@ int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
return err;
}
-int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
struct ethtool_rxnfc *info, u32 *rule_locs)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
int err = 0;
switch (info->cmd) {
case ETHTOOL_GRXCLSRLCNT:
- info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+ info->rule_cnt = ethtool->tot_num_rules;
break;
case ETHTOOL_GRXCLSRULE:
err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 21de4764d4c0..e3a4f01bcceb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -31,25 +31,23 @@
*/
#include <net/tc_act/tc_gact.h>
-#include <net/pkt_cls.h>
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include <net/geneve.h>
#include <linux/bpf.h>
#include <linux/if_bridge.h>
+#include <linux/filter.h>
#include <net/page_pool.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "eswitch.h"
#include "en.h"
#include "en/txrx.h"
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
-#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/macsec.h"
#include "en_accel/en_accel.h"
-#include "en_accel/tls.h"
-#include "accel/ipsec.h"
-#include "accel/tls.h"
+#include "en_accel/ktls.h"
#include "lib/vxlan.h"
#include "lib/clock.h"
#include "en/port.h"
@@ -58,87 +56,54 @@
#include "en/monitor_stats.h"
#include "en/health.h"
#include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
#include "en/hv_vhca_stats.h"
+#include "en/devlink.h"
#include "lib/mlx5.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+#include "qos.h"
+#include "en/trap.h"
-
-bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
- MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
- MLX5_CAP_ETH(mdev, reg_umr_sq);
- u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
- bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+ u16 umr_wqebbs, max_wqebbs;
+ bool striding_rq_umr;
+ striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
if (!striding_rq_umr)
return false;
- if (!inline_umr) {
- mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
- (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
- return false;
- }
- return true;
-}
-
-void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- params->log_rq_mtu_frames = is_kdump_kernel() ?
- MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
- MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
- mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
- BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
- BIT(params->log_rq_mtu_frames),
- BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
- MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
-}
-
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
- return false;
-
- if (MLX5_IPSEC_DEV(mdev))
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+ max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+ /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is
+ * calculated from mlx5e_get_max_sq_aligned_wqebbs.
+ */
+ if (WARN_ON(umr_wqebbs > max_wqebbs))
return false;
- if (params->xdp_prog) {
- /* XSK params are not considered here. If striding RQ is in use,
- * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
- * be called with the known XSK params.
- */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- return false;
- }
-
return true;
}
-void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
-{
- params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
- MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
- MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
- MLX5_WQ_TYPE_CYCLIC;
-}
-
void mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 port_state;
+ bool up;
port_state = mlx5_query_vport_state(mdev,
MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
0);
- if (port_state == VPORT_STATE_UP) {
+ up = port_state == VPORT_STATE_UP;
+ if (up == netif_carrier_ok(priv->netdev))
+ netif_carrier_event(priv->netdev);
+ if (up) {
netdev_info(priv->netdev, "Link up\n");
netif_carrier_on(priv->netdev);
} else {
@@ -159,16 +124,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
mutex_unlock(&priv->state_lock);
}
-void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
-{
- int i;
-
- for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--)
- if (mlx5e_nic_stats_grps[i]->update_stats_mask &
- MLX5E_NDO_UPDATE_STATS)
- mlx5e_nic_stats_grps[i]->update_stats(priv);
-}
-
static void mlx5e_update_stats_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
@@ -221,49 +176,167 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
}
+static int blocking_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb);
+ int err;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_TYPE_TRAP:
+ err = mlx5e_handle_trap_event(priv, data);
+ break;
+ default:
+ netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv)
+{
+ priv->blocking_events_nb.notifier_call = blocking_event;
+ mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb);
+}
+
+static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
+{
+ mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb);
+}
+
+static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+
+ WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD);
+
+ return entries * umr_entry_size / MLX5_OCTWORD;
+}
+
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
struct mlx5e_icosq *sq,
struct mlx5e_umr_wqe *wqe)
{
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
- u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
+ u16 octowords;
+ u8 ds_cnt;
+
+ ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode),
+ MLX5_SEND_WQE_DS);
cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
ds_cnt);
- cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
- cseg->imm = rq->mkey_be;
+ cseg->umr_mkey = rq->mpwqe.umr_mkey_be;
ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
- ucseg->xlt_octowords =
- cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
+ octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode);
+ ucseg->xlt_octowords = cpu_to_be16(octowords);
ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}
-static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
- struct mlx5e_channel *c)
+static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
+{
+ rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
+ GFP_KERNEL, node);
+ if (!rq->mpwqe.shampo)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
+{
+ kvfree(rq->mpwqe.shampo);
+}
+
+static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+
+ shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
+ node);
+ if (!shampo->bitmap)
+ return -ENOMEM;
+
+ shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
+ sizeof(*shampo->info)),
+ GFP_KERNEL, node);
+ if (!shampo->info) {
+ kvfree(shampo->bitmap);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
+{
+ kvfree(rq->mpwqe.shampo->bitmap);
+ kvfree(rq->mpwqe.shampo->info);
+}
+
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
{
int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ size_t alloc_size;
+
+ alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units,
+ rq->mpwqe.pages_per_wqe));
- rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
- sizeof(*rq->mpwqe.info)),
- GFP_KERNEL, cpu_to_node(c->cpu));
+ rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
if (!rq->mpwqe.info)
return -ENOMEM;
- mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
+ mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
return 0;
}
-static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
- u64 npages, u8 page_shift,
- struct mlx5_core_mkey *umr_mkey)
+
+static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode)
{
- int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5_MKC_ACCESS_MODE_MTT;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return MLX5_MKC_ACCESS_MODE_KSM;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return MLX5_MKC_ACCESS_MODE_KLMS;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return MLX5_MKC_ACCESS_MODE_KSM;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
+ u32 npages, u8 page_shift, u32 *umr_mkey,
+ dma_addr_t filler_addr,
+ enum mlx5e_mpwrq_umr_mode umr_mode,
+ u32 xsk_chunk_size)
+{
+ struct mlx5_mtt *mtt;
+ struct mlx5_ksm *ksm;
+ struct mlx5_klm *klm;
+ u32 octwords;
+ int inlen;
void *mkc;
u32 *in;
int err;
+ int i;
+
+ if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED ||
+ umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) &&
+ !MLX5_CAP_GEN(mdev, fixed_buffer_size)) {
+ mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n");
+ return -EINVAL;
+ }
+
+ octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode);
+
+ inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in),
+ MLX5_OCTWORD, octwords);
+ if (inlen < 0)
+ return inlen;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
@@ -275,15 +348,99 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
-
+ MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode));
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
MLX5_SET64(mkc, mkc, len, npages << page_shift);
- MLX5_SET(mkc, mkc, translations_octword_size,
- MLX5_MTT_OCTW(npages));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ MLX5_SET(mkc, mkc, translations_octword_size, octwords);
+ if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)
+ MLX5_SET(mkc, mkc, log_page_size, page_shift - 2);
+ else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED)
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords);
+
+ /* Initialize the mkey with all MTTs pointing to a default
+ * page (filler_addr). When the channels are activated, UMR
+ * WQEs will redirect the RX WQEs to the actual memory from
+ * the RQ's pool, while the gaps (wqe_overflow) remain mapped
+ * to the default page.
+ */
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++) {
+ klm[i << 1] = (struct mlx5_klm) {
+ .va = cpu_to_be64(filler_addr),
+ .bcount = cpu_to_be32(xsk_chunk_size),
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ };
+ klm[(i << 1) + 1] = (struct mlx5_klm) {
+ .va = cpu_to_be64(filler_addr),
+ .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size),
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ };
+ }
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++)
+ ksm[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ .va = cpu_to_be64(filler_addr),
+ };
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++)
+ mtt[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(filler_addr),
+ };
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages * 4; i++) {
+ ksm[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ .va = cpu_to_be64(filler_addr),
+ };
+ }
+ break;
+ }
+
+ err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
+ u64 nentries,
+ u32 *umr_mkey)
+{
+ int inlen;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(mkc, mkc, translations_octword_size, nentries);
+ MLX5_SET(mkc, mkc, length64, 1);
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
kvfree(in);
@@ -292,14 +449,41 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
{
- u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
+ u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0;
+ u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ u32 num_entries, max_num_entries;
+ u32 umr_mkey;
+ int err;
+
+ max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode);
- return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
+ /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */
+ if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe,
+ &num_entries) ||
+ num_entries > max_num_entries))
+ mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n",
+ __func__, wq_size, rq->mpwqe.mtts_per_wqe,
+ max_num_entries);
+
+ err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift,
+ &umr_mkey, rq->wqe_overflow.addr,
+ rq->mpwqe.umr_mode, xsk_chunk_size);
+ rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey);
+ return err;
}
-static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq *rq)
{
- return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
+ u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
+ mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
+ max_klm_size, rq->mpwqe.shampo->hd_per_wq);
+ return -EINVAL;
+ }
+ return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+ &rq->mpwqe.shampo->mkey);
}
static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
@@ -308,7 +492,14 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
struct mlx5e_wqe_frag_info *prev = NULL;
int i;
- next_frag.di = &rq->wqe.di[0];
+ if (rq->xsk_pool) {
+ /* Assumptions used by XSK batched allocator. */
+ WARN_ON(rq->wqe.info.num_frags != 1);
+ WARN_ON(rq->wqe.info.log_num_frags != 0);
+ WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
+ }
+
+ next_frag.au = &rq->wqe.alloc_units[0];
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
@@ -318,7 +509,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
- next_frag.di++;
+ next_frag.au++;
next_frag.offset = 0;
if (prev)
prev->last_in_page = true;
@@ -335,14 +526,13 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
prev->last_in_page = true;
}
-static int mlx5e_init_di_list(struct mlx5e_rq *rq,
- int wq_sz, int cpu)
+static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node)
{
int len = wq_sz << rq->wqe.info.log_num_frags;
- rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)),
- GFP_KERNEL, cpu_to_node(cpu));
- if (!rq->wqe.di)
+ rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)),
+ GFP_KERNEL, node);
+ if (!rq->wqe.alloc_units)
return -ENOMEM;
mlx5e_init_frags_partition(rq);
@@ -350,9 +540,9 @@ static int mlx5e_init_di_list(struct mlx5e_rq *rq,
return 0;
}
-static void mlx5e_free_di_list(struct mlx5e_rq *rq)
+static void mlx5e_free_au_list(struct mlx5e_rq *rq)
{
- kvfree(rq->wqe.di);
+ kvfree(rq->wqe.alloc_units);
}
static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
@@ -362,183 +552,229 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_rq_cqe_err(rq);
}
-static int mlx5e_alloc_rq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
+static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+ rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
+ if (!rq->wqe_overflow.page)
+ return -ENOMEM;
+
+ rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
+ PAGE_SIZE, rq->buff.map_dir);
+ if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
+ __free_page(rq->wqe_overflow.page);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+ dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
+ rq->buff.map_dir);
+ __free_page(rq->wqe_overflow.page);
+}
+
+static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->priv = c->priv;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->icosq = &c->icosq;
+ rq->ix = c->ix;
+ rq->channel = c;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->stats = &c->priv->channel_stats[c->ix]->rq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ err = mlx5e_rq_set_handlers(rq, params, NULL);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
+}
+
+static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq *rq,
+ u32 *pool_size,
+ int node)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
+ int wq_size;
+ int err;
+
+ if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ return 0;
+ err = mlx5e_rq_shampo_hd_alloc(rq, node);
+ if (err)
+ goto out;
+ rq->mpwqe.shampo->hd_per_wq =
+ mlx5e_shampo_hd_per_wq(mdev, params, rqp);
+ err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
+ if (err)
+ goto err_shampo_hd;
+ err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
+ if (err)
+ goto err_shampo_info;
+ rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
+ if (!rq->hw_gro_data) {
+ err = -ENOMEM;
+ goto err_hw_gro_data;
+ }
+ rq->mpwqe.shampo->key =
+ cpu_to_be32(rq->mpwqe.shampo->mkey);
+ rq->mpwqe.shampo->hd_per_wqe =
+ mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
+ wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
+ MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
+ return 0;
+
+err_hw_gro_data:
+ mlx5e_rq_shampo_hd_info_free(rq);
+err_shampo_info:
+ mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
+err_shampo_hd:
+ mlx5e_rq_shampo_hd_free(rq);
+out:
+ return err;
+}
+
+static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
+{
+ if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ return;
+
+ kvfree(rq->hw_gro_data);
+ mlx5e_rq_shampo_hd_info_free(rq);
+ mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
+ mlx5e_rq_shampo_hd_free(rq);
+}
+
+static int mlx5e_alloc_rq(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem,
struct mlx5e_rq_param *rqp,
- struct mlx5e_rq *rq)
+ int node, struct mlx5e_rq *rq)
{
struct page_pool_params pp_params = { 0 };
- struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_core_dev *mdev = rq->mdev;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
- u32 num_xsk_frames = 0;
- u32 rq_xdp_ix;
u32 pool_size;
int wq_sz;
int err;
int i;
- rqp->wq.db_numa_node = cpu_to_node(c->cpu);
-
- rq->wq_type = params->rq_wq_type;
- rq->pdev = c->pdev;
- rq->netdev = c->netdev;
- rq->tstamp = c->tstamp;
- rq->clock = &mdev->clock;
- rq->channel = c;
- rq->ix = c->ix;
- rq->mdev = mdev;
- rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- rq->xdpsq = &c->rq_xdpsq;
- rq->umem = umem;
-
- if (rq->umem)
- rq->stats = &c->priv->channel_stats[c->ix].xskrq;
- else
- rq->stats = &c->priv->channel_stats[c->ix].rq;
+ rqp->wq.db_numa_node = node;
INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
if (params->xdp_prog)
bpf_prog_inc(params->xdp_prog);
- rq->xdp_prog = params->xdp_prog;
-
- rq_xdp_ix = rq->ix;
- if (xsk)
- rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
- err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
- if (err < 0)
- goto err_rq_wq_destroy;
+ RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
- rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+ rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
- rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
pool_size = 1 << params->log_rq_mtu_frames;
+ rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
+
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
&rq->wq_ctrl);
if (err)
- return err;
+ goto err_rq_xdp_prog;
+
+ err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
+ if (err)
+ goto err_rq_wq_destroy;
rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- if (xsk)
- num_xsk_frames = wq_sz <<
- mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
-
- pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
- mlx5e_mpwqe_get_log_rq_size(params, xsk);
-
- rq->post_wqes = mlx5e_post_rx_mpwqes;
- rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
-
- rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
-#ifdef CONFIG_MLX5_EN_IPSEC
- if (MLX5_IPSEC_DEV(mdev)) {
- err = -EINVAL;
- netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
- goto err_rq_wq_destroy;
- }
-#endif
- if (!rq->handle_rx_cqe) {
- err = -EINVAL;
- netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
- goto err_rq_wq_destroy;
- }
-
- rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
- mlx5e_xsk_skb_from_cqe_mpwrq_linear :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
- mlx5e_skb_from_cqe_mpwrq_linear :
- mlx5e_skb_from_cqe_mpwrq_nonlinear;
+ rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ rq->mpwqe.pages_per_wqe =
+ mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+ rq->mpwqe.umr_wqebbs =
+ mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+ rq->mpwqe.mtts_per_wqe =
+ mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+
+ pool_size = rq->mpwqe.pages_per_wqe <<
+ mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
+
+ rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
- goto err_rq_wq_destroy;
- rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+ goto err_rq_drop_page;
+
+ err = mlx5e_rq_alloc_mpwqe_info(rq, node);
+ if (err)
+ goto err_rq_mkey;
- err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+ err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
if (err)
- goto err_free;
+ goto err_free_mpwqe_info;
+
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
&rq->wq_ctrl);
if (err)
- return err;
+ goto err_rq_xdp_prog;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
- if (xsk)
- num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
-
rq->wqe.info = rqp->frags_info;
+ rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
+
rq->wqe.frags =
kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
(wq_sz << rq->wqe.info.log_num_frags)),
- GFP_KERNEL, cpu_to_node(c->cpu));
+ GFP_KERNEL, node);
if (!rq->wqe.frags) {
err = -ENOMEM;
- goto err_free;
+ goto err_rq_wq_destroy;
}
- err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
+ err = mlx5e_init_au_list(rq, wq_sz, node);
if (err)
- goto err_free;
-
- rq->post_wqes = mlx5e_post_rx_wqes;
- rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
-
-#ifdef CONFIG_MLX5_EN_IPSEC
- if (c->priv->ipsec)
- rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
- else
-#endif
- rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
- if (!rq->handle_rx_cqe) {
- err = -EINVAL;
- netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
- goto err_free;
- }
-
- rq->wqe.skb_from_cqe = xsk ?
- mlx5e_xsk_skb_from_cqe_linear :
- mlx5e_rx_is_linear_skb(params, NULL) ?
- mlx5e_skb_from_cqe_linear :
- mlx5e_skb_from_cqe_nonlinear;
- rq->mkey_be = c->mkey_be;
+ goto err_rq_frags;
}
if (xsk) {
- err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
- if (unlikely(err)) {
- mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
- num_xsk_frames);
- goto err_free;
- }
-
- rq->zca.free = mlx5e_xsk_zca_free;
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_ZERO_COPY,
- &rq->zca);
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
} else {
/* Create a page_pool and register it with rxq */
pp_params.order = 0;
pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
pp_params.pool_size = pool_size;
- pp_params.nid = cpu_to_node(c->cpu);
- pp_params.dev = c->pdev;
+ pp_params.nid = node;
+ pp_params.dev = rq->pdev;
pp_params.dma_dir = rq->buff.map_dir;
/* page_pool can be used even when there is no rq->xdp_prog,
@@ -550,13 +786,14 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (IS_ERR(rq->page_pool)) {
err = PTR_ERR(rq->page_pool);
rq->page_pool = NULL;
- goto err_free;
+ goto err_free_by_rq_type;
}
- err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_PAGE_POOL, rq->page_pool);
+ if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
}
if (err)
- goto err_free;
+ goto err_destroy_page_pool;
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -564,11 +801,14 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
u32 byte_count =
rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
- u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
+ u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) <<
+ rq->mpwqe.page_shift;
+ u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
+ 0 : rq->buff.headroom;
- wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
+ wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
wqe->data[0].byte_count = cpu_to_be32(byte_count);
- wqe->data[0].lkey = rq->mkey_be;
+ wqe->data[0].lkey = rq->mpwqe.umr_mkey_be;
} else {
struct mlx5e_rx_wqe_cyc *wqe =
mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
@@ -606,53 +846,64 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
return 0;
-err_free:
+err_destroy_page_pool:
+ page_pool_destroy(rq->page_pool);
+err_free_by_rq_type:
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ mlx5e_rq_free_shampo(rq);
+err_free_mpwqe_info:
kvfree(rq->mpwqe.info);
- mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+err_rq_mkey:
+ mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
+err_rq_drop_page:
+ mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
+ mlx5e_free_au_list(rq);
+err_rq_frags:
kvfree(rq->wqe.frags);
- mlx5e_free_di_list(rq);
}
-
err_rq_wq_destroy:
- if (rq->xdp_prog)
- bpf_prog_put(rq->xdp_prog);
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
+err_rq_xdp_prog:
+ if (params->xdp_prog)
+ bpf_prog_put(params->xdp_prog);
return err;
}
static void mlx5e_free_rq(struct mlx5e_rq *rq)
{
+ struct bpf_prog *old_prog;
int i;
- if (rq->xdp_prog)
- bpf_prog_put(rq->xdp_prog);
+ if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
+ old_prog = rcu_dereference_protected(rq->xdp_prog,
+ lockdep_is_held(&rq->priv->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
- mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+ mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
+ mlx5e_free_mpwqe_rq_drop_page(rq);
+ mlx5e_rq_free_shampo(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
kvfree(rq->wqe.frags);
- mlx5e_free_di_list(rq);
+ mlx5e_free_au_list(rq);
}
for (i = rq->page_cache.head; i != rq->page_cache.tail;
i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
- struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
-
/* With AF_XDP, page_cache is not used, so this loop is not
* entered, and it's safe to call mlx5e_page_release_dynamic
* directly.
*/
- mlx5e_page_release_dynamic(rq, dma_info, false);
+ mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false);
}
xdp_rxq_info_unreg(&rq->xdp_rxq);
@@ -660,11 +911,10 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
mlx5_wq_destroy(&rq->wq_ctrl);
}
-static int mlx5e_create_rq(struct mlx5e_rq *rq,
- struct mlx5e_rq_param *param)
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
{
struct mlx5_core_dev *mdev = rq->mdev;
-
+ u8 ts_format;
void *in;
void *rqc;
void *wq;
@@ -677,6 +927,9 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
if (!in)
return -ENOMEM;
+ ts_format = mlx5_is_real_time_rq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
@@ -684,10 +937,17 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ MLX5_SET(wq, wq, log_headers_buffer_entry_num,
+ order_base_2(rq->mpwqe.shampo->hd_per_wq));
+ MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey);
+ }
+
mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(wq, wq, pas));
@@ -698,7 +958,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
return err;
}
-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
+static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -720,18 +980,42 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
MLX5_SET(modify_rq_in, in, rq_state, curr_state);
MLX5_SET(rqc, rqc, state, next_state);
- err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in);
kvfree(in);
return err;
}
+static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+{
+ struct net_device *dev = rq->netdev;
+ int err;
+
+ err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
+ if (err) {
+ netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+ return err;
+ }
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err) {
+ netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
+{
+ mlx5e_free_rx_descs(rq);
+
+ return mlx5e_rq_to_ready(rq, curr_state);
+}
+
static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
{
- struct mlx5e_channel *c = rq->channel;
- struct mlx5e_priv *priv = c->priv;
- struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_core_dev *mdev = rq->mdev;
void *in;
void *rqc;
@@ -751,7 +1035,7 @@ static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
MLX5_SET(rqc, rqc, scatter_fcs, enable);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
- err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in);
kvfree(in);
@@ -760,8 +1044,7 @@ static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
{
- struct mlx5e_channel *c = rq->channel;
- struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_core_dev *mdev = rq->mdev;
void *in;
void *rqc;
int inlen;
@@ -780,14 +1063,14 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
MLX5_SET(rqc, rqc, vsd, vsd);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
- err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in);
kvfree(in);
return err;
}
-static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
+void mlx5e_destroy_rq(struct mlx5e_rq *rq)
{
mlx5_core_destroy_rq(rq->mdev, rq->rqn);
}
@@ -795,7 +1078,6 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
{
unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
- struct mlx5e_channel *c = rq->channel;
u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
@@ -806,13 +1088,45 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
msleep(20);
} while (time_before(jiffies, exp_time));
- netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
- c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
+ netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
+ rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
mlx5e_reporter_rx_timeout(rq);
return -ETIMEDOUT;
}
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq;
+ u16 head;
+ int i;
+
+ if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return;
+
+ wq = &rq->mpwqe.wq;
+ head = wq->head;
+
+ /* Outstanding UMR WQEs (in progress) start at wq->head */
+ for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+ rq->dealloc_wqe(rq, head);
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ }
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ u16 len;
+
+ len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) &
+ (rq->mpwqe.shampo->hd_per_wq - 1);
+ mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false);
+ rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci;
+ }
+
+ rq->mpwqe.actual_wq_head = wq->head;
+ rq->mpwqe.umr_in_progress = 0;
+ rq->mpwqe.umr_completed = 0;
+}
+
void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
{
__be16 wqe_ix_be;
@@ -820,14 +1134,8 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
- u16 head = wq->head;
- int i;
- /* Outstanding UMR WQEs (in progress) start at wq->head */
- for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
- rq->dealloc_wqe(rq, head);
- head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
- }
+ mlx5e_free_rx_in_progress_descs(rq);
while (!mlx5_wq_ll_is_empty(wq)) {
struct mlx5e_rx_wqe_ll *wqe;
@@ -839,6 +1147,10 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
mlx5_wq_ll_pop(wq, wqe_ix_be,
&wqe->next.next_wqe_index);
}
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq,
+ 0, true);
} else {
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -851,13 +1163,17 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
}
-int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem, struct mlx5e_rq *rq)
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+ struct mlx5e_xsk_param *xsk, int node,
+ struct mlx5e_rq *rq)
{
+ struct mlx5_core_dev *mdev = rq->mdev;
int err;
- err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
+
+ err = mlx5e_alloc_rq(params, xsk, param, node, rq);
if (err)
return err;
@@ -869,18 +1185,25 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
if (err)
goto err_destroy_rq;
- if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full))
- __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state);
+ if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
+ __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
if (params->rx_dim_enabled)
- __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+ __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
/* We disable csum_complete when XDP is enabled since
* XDP programs might manipulate packets which will render
* skb->checksum incorrect.
*/
- if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
- __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
+
+ /* For CQE compression on striding RQ, use stride index provided by
+ * HW if capability is supported.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
+ __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
return 0;
@@ -895,19 +1218,17 @@ err_free_rq:
void mlx5e_activate_rq(struct mlx5e_rq *rq)
{
set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
- mlx5e_trigger_irq(&rq->channel->icosq);
}
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
- napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
}
void mlx5e_close_rq(struct mlx5e_rq *rq)
{
cancel_work_sync(&rq->dim.work);
- cancel_work_sync(&rq->channel->icosq.recover_work);
cancel_work_sync(&rq->recover_work);
mlx5e_destroy_rq(rq);
mlx5e_free_rx_descs(rq);
@@ -925,9 +1246,10 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+ size_t size;
- xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
- GFP_KERNEL, numa);
+ size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
+ xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
if (!xdpi_fifo->xi)
return -ENOMEM;
@@ -941,10 +1263,11 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ size_t size;
int err;
- sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
- GFP_KERNEL, numa);
+ size = array_size(sizeof(*sq->db.wqe_info), wq_sz);
+ sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
if (!sq->db.wqe_info)
return -ENOMEM;
@@ -959,7 +1282,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
struct mlx5e_params *params,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_sq_param *param,
struct mlx5e_xdpsq *sq,
bool is_redirect)
@@ -972,16 +1295,19 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->pdev = c->pdev;
sq->mkey_be = c->mkey_be;
sq->channel = c;
- sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- sq->umem = umem;
+ sq->xsk_pool = xsk_pool;
- sq->stats = sq->umem ?
- &c->priv->channel_stats[c->ix].xsksq :
+ sq->stats = sq->xsk_pool ?
+ &c->priv->channel_stats[c->ix]->xsksq :
is_redirect ?
- &c->priv->channel_stats[c->ix].xdpsq :
- &c->priv->channel_stats[c->ix].rq_xdpsq;
+ &c->priv->channel_stats[c->ix]->xdpsq :
+ &c->priv->channel_stats[c->ix]->rq_xdpsq;
+ sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) :
+ mlx5e_stop_room_for_max_wqe(mdev);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1009,17 +1335,17 @@ static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
{
- kvfree(sq->db.ico_wqe);
+ kvfree(sq->db.wqe_info);
}
static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ size_t size;
- sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
- sizeof(*sq->db.ico_wqe)),
- GFP_KERNEL, numa);
- if (!sq->db.ico_wqe)
+ size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
+ sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
+ if (!sq->db.wqe_info)
return -ENOMEM;
return 0;
@@ -1033,9 +1359,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_icosq_cqe_err(sq);
}
+static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+ recover_work);
+
+ /* Not implemented yet. */
+
+ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
+}
+
static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
struct mlx5e_sq_param *param,
- struct mlx5e_icosq *sq)
+ struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
struct mlx5_core_dev *mdev = c->mdev;
@@ -1043,7 +1380,8 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
int err;
sq->channel = c;
- sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->reserved_room = param->stop_room;
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1055,7 +1393,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
if (err)
goto err_sq_wq_destroy;
- INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
+ INIT_WORK(&sq->recover_work, recover_work_func);
return 0;
@@ -1071,13 +1409,14 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
mlx5_wq_destroy(&sq->wq_ctrl);
}
-static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
+void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
kvfree(sq->db.wqe_info);
+ kvfree(sq->db.skb_fifo.fifo);
kvfree(sq->db.dma_fifo);
}
-static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
+int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
@@ -1085,20 +1424,26 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa);
+ sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.skb_fifo.fifo)),
+ GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa);
- if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+ if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq);
return -ENOMEM;
}
sq->dma_fifo_mask = df_sz - 1;
+ sq->db.skb_fifo.pc = &sq->skb_fifo_pc;
+ sq->db.skb_fifo.cc = &sq->skb_fifo_cc;
+ sq->db.skb_fifo.mask = df_sz - 1;
+
return 0;
}
-static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int txq_ix,
struct mlx5e_params *params,
@@ -1112,30 +1457,26 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
- sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
- sq->channel = c;
+ sq->netdev = c->netdev;
+ sq->mdev = c->mdev;
+ sq->priv = c->priv;
sq->ch_ix = c->ix;
sq->txq_ix = txq_ix;
- sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
- sq->stop_room = MLX5E_SQ_STOP_ROOM;
+ sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
- if (MLX5_IPSEC_DEV(c->priv->mdev))
+ if (mlx5_ipsec_device_caps(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
-#ifdef CONFIG_MLX5_EN_TLS
- if (mlx5_accel_is_tls_device(c->priv->mdev)) {
- set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
- sq->stop_room += MLX5E_SQ_TLS_ROOM +
- mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
- TLS_MAX_PAYLOAD_SIZE);
- }
-#endif
+ if (param->is_mpw)
+ set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
+ sq->stop_room = param->stop_room;
+ sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1158,25 +1499,18 @@ err_sq_wq_destroy:
return err;
}
-static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
{
mlx5e_free_txqsq_db(sq);
mlx5_wq_destroy(&sq->wq_ctrl);
}
-struct mlx5e_create_sq_param {
- struct mlx5_wq_ctrl *wq_ctrl;
- u32 cqn;
- u32 tisn;
- u8 tis_lst_sz;
- u8 min_inline_mode;
-};
-
static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param,
struct mlx5e_create_sq_param *csp,
u32 *sqn)
{
+ u8 ts_format;
void *in;
void *sqc;
void *wq;
@@ -1189,6 +1523,9 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
if (!in)
return -ENOMEM;
+ ts_format = mlx5_is_real_time_sq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -1196,6 +1533,9 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz);
MLX5_SET(sqc, sqc, tis_num_0, csp->tisn);
MLX5_SET(sqc, sqc, cqn, csp->cqn);
+ MLX5_SET(sqc, sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn);
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
+
if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode);
@@ -1204,7 +1544,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
- MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma);
@@ -1222,6 +1562,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
struct mlx5e_modify_sq_param *p)
{
+ u64 bitmask = 0;
void *in;
void *sqc;
int inlen;
@@ -1237,11 +1578,16 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
MLX5_SET(sqc, sqc, state, p->next_state);
if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
- MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
- MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
+ bitmask |= 1;
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
}
+ if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
+ bitmask |= 1 << 2;
+ MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
+ }
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
- err = mlx5_core_modify_sq(mdev, sqn, in, inlen);
+ err = mlx5_core_modify_sq(mdev, sqn, in);
kvfree(in);
@@ -1253,10 +1599,11 @@ static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
mlx5_core_destroy_sq(mdev, sqn);
}
-static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
- struct mlx5e_sq_param *param,
- struct mlx5e_create_sq_param *csp,
- u32 *sqn)
+int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u16 qos_queue_group_id,
+ u32 *sqn)
{
struct mlx5e_modify_sq_param msp = {0};
int err;
@@ -1267,6 +1614,10 @@ static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
msp.curr_state = MLX5_SQC_STATE_RST;
msp.next_state = MLX5_SQC_STATE_RDY;
+ if (qos_queue_group_id) {
+ msp.qos_update = true;
+ msp.qos_queue_group_id = qos_queue_group_id;
+ }
err = mlx5e_modify_sq(mdev, *sqn, &msp);
if (err)
mlx5e_destroy_sq(mdev, *sqn);
@@ -1277,13 +1628,10 @@ static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
static int mlx5e_set_sq_maxrate(struct net_device *dev,
struct mlx5e_txqsq *sq, u32 rate);
-static int mlx5e_open_txqsq(struct mlx5e_channel *c,
- u32 tisn,
- int txq_ix,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param,
- struct mlx5e_txqsq *sq,
- int tc)
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+ struct mlx5e_params *params, struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
+ struct mlx5e_sq_stats *sq_stats)
{
struct mlx5e_create_sq_param csp = {};
u32 tx_rate;
@@ -1293,12 +1641,14 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
if (err)
return err;
+ sq->stats = sq_stats;
+
csp.tisn = tisn;
csp.tis_lst_sz = 1;
csp.cqn = sq->cq.mcq.cqn;
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = sq->min_inline_mode;
- err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
if (err)
goto err_free_txqsq;
@@ -1319,7 +1669,7 @@ err_free_txqsq:
void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
{
- sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
+ sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix);
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
netdev_tx_reset_queue(sq->txq);
netif_tx_start_queue(sq->txq);
@@ -1332,36 +1682,32 @@ void mlx5e_tx_disable_queue(struct netdev_queue *txq)
__netif_tx_unlock_bh(txq);
}
-static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
{
- struct mlx5e_channel *c = sq->channel;
struct mlx5_wq_cyc *wq = &sq->wq;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- /* prevent netif_tx_wake_queue */
- napi_synchronize(&c->napi);
+ synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
mlx5e_tx_disable_queue(sq->txq);
/* last doorbell out, godspeed .. */
if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- struct mlx5e_tx_wqe_info *wi;
struct mlx5e_tx_wqe *nop;
- wi = &sq->db.wqe_info[pi];
+ sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
- memset(wi, 0, sizeof(*wi));
- wi->num_wqebbs = 1;
nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
}
}
-static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
{
- struct mlx5e_channel *c = sq->channel;
- struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_core_dev *mdev = sq->mdev;
struct mlx5_rate_limit rl = {0};
cancel_work_sync(&sq->dim.work);
@@ -1375,7 +1721,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
mlx5e_free_txqsq(sq);
}
-static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
+void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
{
struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
recover_work);
@@ -1383,25 +1729,35 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_tx_err_cqe(sq);
}
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
+static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_icosq(c, param, sq);
+ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
if (err)
return err;
csp.cqn = sq->cq.mcq.cqn;
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = params->tx_min_inline_mode;
- err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
if (err)
goto err_free_icosq;
+ if (param->is_tls) {
+ sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list();
+ if (IS_ERR(sq->ktls_resync)) {
+ err = PTR_ERR(sq->ktls_resync);
+ goto err_destroy_icosq;
+ }
+ }
return 0;
+err_destroy_icosq:
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
err_free_icosq:
mlx5e_free_icosq(sq);
@@ -1415,28 +1771,29 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
{
- struct mlx5e_channel *c = icosq->channel;
-
clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
- napi_synchronize(&c->napi);
+ synchronize_net(); /* Sync with NAPI. */
}
-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
{
struct mlx5e_channel *c = sq->channel;
+ if (sq->ktls_resync)
+ mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync);
mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_icosq_descs(sq);
mlx5e_free_icosq(sq);
}
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
+ err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
if (err)
return err;
@@ -1446,14 +1803,22 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+
+ /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
+ * supported by upstream, and there is no defined trigger to allow
+ * transmitting redirected multi-buffer frames.
+ */
+ if (param->is_xdp_mb && !is_redirect)
+ set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
+
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
if (err)
goto err_free_xdpsq;
mlx5e_set_xmit_fp(sq, param->is_mpw);
- if (!param->is_mpw) {
- unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
+ unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
unsigned int inline_hdr_sz = 0;
int i;
@@ -1464,20 +1829,21 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
/* Pre initialize fixed WQE fields */
for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
- struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg;
+ sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 1,
+ };
+
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
dseg->lkey = sq->mkey_be;
-
- wi->num_wqebbs = 1;
- wi->num_pkts = 1;
}
}
@@ -1495,27 +1861,22 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
struct mlx5e_channel *c = sq->channel;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- napi_synchronize(&c->napi);
+ synchronize_net(); /* Sync with NAPI. */
mlx5e_destroy_sq(c->mdev, sq->sqn);
mlx5e_free_xdpsq_descs(sq);
mlx5e_free_xdpsq(sq);
}
-static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
+static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param,
struct mlx5e_cq *cq)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
- int eqn_not_used;
- unsigned int irqn;
int err;
u32 i;
- err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
- if (err)
- return err;
-
err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
&cq->wq_ctrl);
if (err)
@@ -1529,7 +1890,6 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
mcq->vector = param->eq_ix;
mcq->comp = mlx5e_completion_event;
mcq->event = mlx5e_cq_error_event;
- mcq->irqn = irqn;
for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
@@ -1538,25 +1898,27 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
}
cq->mdev = mdev;
+ cq->netdev = priv->netdev;
+ cq->priv = priv;
return 0;
}
-static int mlx5e_alloc_cq(struct mlx5e_channel *c,
+static int mlx5e_alloc_cq(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param,
+ struct mlx5e_create_cq_param *ccp,
struct mlx5e_cq *cq)
{
- struct mlx5_core_dev *mdev = c->priv->mdev;
int err;
- param->wq.buf_numa_node = cpu_to_node(c->cpu);
- param->wq.db_numa_node = cpu_to_node(c->cpu);
- param->eq_ix = c->ix;
+ param->wq.buf_numa_node = ccp->node;
+ param->wq.db_numa_node = ccp->node;
+ param->eq_ix = ccp->ix;
- err = mlx5e_alloc_cq_common(mdev, param, cq);
+ err = mlx5e_alloc_cq_common(priv, param, cq);
- cq->napi = &c->napi;
- cq->channel = c;
+ cq->napi = ccp->napi;
+ cq->ch_stats = ccp->ch_stats;
return err;
}
@@ -1575,11 +1937,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
void *in;
void *cqc;
int inlen;
- unsigned int irqn_not_used;
int eqn;
int err;
- err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn);
if (err)
return err;
@@ -1597,7 +1958,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
(__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -1620,13 +1981,14 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
}
-int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
- struct mlx5e_cq_param *param, struct mlx5e_cq *cq)
+int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
+ struct mlx5e_cq *cq)
{
- struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
int err;
- err = mlx5e_alloc_cq(c, param, cq);
+ err = mlx5e_alloc_cq(priv, param, ccp, cq);
if (err)
return err;
@@ -1652,14 +2014,15 @@ void mlx5e_close_cq(struct mlx5e_cq *cq)
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
+ struct mlx5e_create_cq_param *ccp,
struct mlx5e_channel_param *cparam)
{
int err;
int tc;
for (tc = 0; tc < c->num_tc; tc++) {
- err = mlx5e_open_cq(c, params->tx_cq_moderation,
- &cparam->tx_cq, &c->sq[tc].cq);
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp,
+ ccp, &c->sq[tc].cq);
if (err)
goto err_close_tx_cqs;
}
@@ -1681,17 +2044,60 @@ static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
mlx5e_close_cq(&c->sq[tc].cq);
}
+static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq)
+{
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++)
+ if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count)
+ return tc;
+
+ WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq);
+ return -ENOENT;
+}
+
+static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix,
+ u32 *hw_id)
+{
+ int tc;
+
+ if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) {
+ *hw_id = 0;
+ return 0;
+ }
+
+ tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix);
+ if (tc < 0)
+ return tc;
+
+ if (tc >= params->mqprio.num_tc) {
+ WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u",
+ tc, params->mqprio.num_tc);
+ return -EINVAL;
+ }
+
+ *hw_id = params->mqprio.channel.hw_id[tc];
+ return 0;
+}
+
static int mlx5e_open_sqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
{
int err, tc;
- for (tc = 0; tc < params->num_tc; tc++) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
int txq_ix = c->ix + tc * params->num_channels;
+ u32 qos_queue_group_id;
+
+ err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id);
+ if (err)
+ goto err_close_sqs;
err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
- params, &cparam->sq, &c->sq[tc], tc);
+ params, &cparam->txq_sq, &c->sq[tc], tc,
+ qos_queue_group_id,
+ &c->priv->channel_stats[c->ix]->sq[tc]);
if (err)
goto err_close_sqs;
}
@@ -1794,27 +2200,16 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
return err;
}
-static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c,
- struct mlx5e_params *params)
+static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_params)
{
- int num_comp_vectors = mlx5_comp_vectors_count(c->mdev);
- int irq;
-
- if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL))
- return -ENOMEM;
-
- for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) {
- int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq));
-
- cpumask_set_cpu(cpu, c->xps_cpumask);
- }
+ int err;
- return 0;
-}
+ err = mlx5e_init_rxq_rq(c, params, &c->rq);
+ if (err)
+ return err;
-static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
-{
- free_cpumask_var(c->xps_cpumask);
+ return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
}
static int mlx5e_open_queues(struct mlx5e_channel *c,
@@ -1822,73 +2217,92 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
struct mlx5e_channel_param *cparam)
{
struct dim_cq_moder icocq_moder = {0, 0};
+ struct mlx5e_create_cq_param ccp;
int err;
- err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
+ &c->async_icosq.cq);
if (err)
return err;
- err = mlx5e_open_tx_cqs(c, params, cparam);
+ err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
+ &c->icosq.cq);
+ if (err)
+ goto err_close_async_icosq_cq;
+
+ err = mlx5e_open_tx_cqs(c, params, &ccp, cparam);
if (err)
goto err_close_icosq_cq;
- err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
+ &c->xdpsq.cq);
if (err)
goto err_close_tx_cqs;
- err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
+ err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
+ &c->rq.cq);
if (err)
goto err_close_xdp_tx_cqs;
- /* XDP SQ CQ params are same as normal TXQ sq CQ params */
- err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
- &cparam->tx_cq, &c->rq_xdpsq.cq) : 0;
+ err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp,
+ &ccp, &c->rq_xdpsq.cq) : 0;
if (err)
goto err_close_rx_cq;
- napi_enable(&c->napi);
+ spin_lock_init(&c->async_icosq_lock);
+
+ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
+ mlx5e_async_icosq_err_cqe_work);
+ if (err)
+ goto err_close_xdpsq_cq;
+
+ mutex_init(&c->icosq_recovery_lock);
- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
+ mlx5e_icosq_err_cqe_work);
if (err)
- goto err_disable_napi;
+ goto err_close_async_icosq;
err = mlx5e_open_sqs(c, params, cparam);
if (err)
goto err_close_icosq;
+ err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
+ if (err)
+ goto err_close_sqs;
+
if (c->xdp) {
err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
&c->rq_xdpsq, false);
if (err)
- goto err_close_sqs;
+ goto err_close_rq;
}
- err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
- if (err)
- goto err_close_xdp_sq;
-
err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
if (err)
- goto err_close_rq;
+ goto err_close_xdp_sq;
return 0;
-err_close_rq:
- mlx5e_close_rq(&c->rq);
-
err_close_xdp_sq:
if (c->xdp)
mlx5e_close_xdpsq(&c->rq_xdpsq);
+err_close_rq:
+ mlx5e_close_rq(&c->rq);
+
err_close_sqs:
mlx5e_close_sqs(c);
err_close_icosq:
mlx5e_close_icosq(&c->icosq);
-err_disable_napi:
- napi_disable(&c->napi);
+err_close_async_icosq:
+ mlx5e_close_icosq(&c->async_icosq);
+err_close_xdpsq_cq:
if (c->xdp)
mlx5e_close_cq(&c->rq_xdpsq.cq);
@@ -1904,24 +2318,31 @@ err_close_tx_cqs:
err_close_icosq_cq:
mlx5e_close_cq(&c->icosq.cq);
+err_close_async_icosq_cq:
+ mlx5e_close_cq(&c->async_icosq.cq);
+
return err;
}
static void mlx5e_close_queues(struct mlx5e_channel *c)
{
mlx5e_close_xdpsq(&c->xdpsq);
- mlx5e_close_rq(&c->rq);
if (c->xdp)
mlx5e_close_xdpsq(&c->rq_xdpsq);
+ /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
+ cancel_work_sync(&c->icosq.recover_work);
+ mlx5e_close_rq(&c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
- napi_disable(&c->napi);
+ mutex_destroy(&c->icosq_recovery_lock);
+ mlx5e_close_icosq(&c->async_icosq);
if (c->xdp)
mlx5e_close_cq(&c->rq_xdpsq.cq);
mlx5e_close_cq(&c->rq.cq);
mlx5e_close_cq(&c->xdpsq.cq);
mlx5e_close_tx_cqs(c);
mlx5e_close_cq(&c->icosq.cq);
+ mlx5e_close_cq(&c->async_icosq.cq);
}
static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
@@ -1931,10 +2352,48 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
}
+static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
+{
+ if (ix > priv->stats_nch) {
+ netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix,
+ priv->stats_nch);
+ return -EINVAL;
+ }
+
+ if (priv->channel_stats[ix])
+ return 0;
+
+ /* Asymmetric dynamic memory allocation.
+ * Freed in mlx5e_priv_arrays_free, not on channel closure.
+ */
+ mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix);
+ priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!priv->channel_stats[ix])
+ return -ENOMEM;
+ priv->stats_nch++;
+
+ return 0;
+}
+
+void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
+{
+ spin_lock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_irq(&c->async_icosq);
+ spin_unlock_bh(&c->async_icosq_lock);
+}
+
+void mlx5e_trigger_napi_sched(struct napi_struct *napi)
+{
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+}
+
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_channel **cp)
{
int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
@@ -1943,9 +2402,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel *c;
unsigned int irq;
int err;
- int eqn;
- err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+ err = mlx5_vector2irqn(priv->mdev, ix, &irq);
+ if (err)
+ return err;
+
+ err = mlx5e_channel_stats_alloc(priv, ix, cpu);
if (err)
return err;
@@ -1958,28 +2420,24 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->tstamp = &priv->tstamp;
c->ix = ix;
c->cpu = cpu;
- c->pdev = priv->mdev->device;
+ c->pdev = mlx5_core_dma_dev(priv->mdev);
c->netdev = priv->netdev;
- c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
- c->num_tc = params->num_tc;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
c->xdp = !!params->xdp_prog;
- c->stats = &priv->channel_stats[ix].ch;
- c->irq_desc = irq_to_desc(irq);
+ c->stats = &priv->channel_stats[ix]->ch;
+ c->aff_mask = irq_get_effective_affinity_mask(irq);
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
- err = mlx5e_alloc_xps_cpumask(c, params);
- if (err)
- goto err_free_channel;
-
- netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
err = mlx5e_open_queues(c, params, cparam);
if (unlikely(err))
goto err_napi_del;
- if (umem) {
- mlx5e_build_xsk_param(umem, &xsk);
- err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (xsk_pool) {
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
+ err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
if (unlikely(err))
goto err_close_queues;
}
@@ -1993,9 +2451,7 @@ err_close_queues:
err_napi_del:
netif_napi_del(&c->napi);
- mlx5e_free_xps_cpumask(c);
-err_free_channel:
kvfree(c);
return err;
@@ -2005,14 +2461,19 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
{
int tc;
+ napi_enable(&c->napi);
+
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_icosq(&c->icosq);
- mlx5e_activate_rq(&c->rq);
- netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
+ mlx5e_activate_icosq(&c->async_icosq);
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_activate_xsk(c);
+ else
+ mlx5e_activate_rq(&c->rq);
+
+ mlx5e_trigger_napi_icosq(c);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -2021,11 +2482,16 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_deactivate_xsk(c);
+ else
+ mlx5e_deactivate_rq(&c->rq);
- mlx5e_deactivate_rq(&c->rq);
+ mlx5e_deactivate_icosq(&c->async_icosq);
mlx5e_deactivate_icosq(&c->icosq);
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_deactivate_txqsq(&c->sq[tc]);
+ mlx5e_qos_deactivate_queues(c);
+
+ napi_disable(&c->napi);
}
static void mlx5e_close_channel(struct mlx5e_channel *c)
@@ -2033,288 +2499,12 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_close_xsk(c);
mlx5e_close_queues(c);
+ mlx5e_qos_close_queues(c);
netif_napi_del(&c->napi);
- mlx5e_free_xps_cpumask(c);
kvfree(c);
}
-#define DEFAULT_FRAG_SIZE (2048)
-
-static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_frags_info *info)
-{
- u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- int frag_size_max = DEFAULT_FRAG_SIZE;
- u32 buf_size = 0;
- int i;
-
-#ifdef CONFIG_MLX5_EN_IPSEC
- if (MLX5_IPSEC_DEV(mdev))
- byte_count += MLX5E_METADATA_ETHER_LEN;
-#endif
-
- if (mlx5e_rx_is_linear_skb(params, xsk)) {
- int frag_stride;
-
- frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
- frag_stride = roundup_pow_of_two(frag_stride);
-
- info->arr[0].frag_size = byte_count;
- info->arr[0].frag_stride = frag_stride;
- info->num_frags = 1;
- info->wqe_bulk = PAGE_SIZE / frag_stride;
- goto out;
- }
-
- if (byte_count > PAGE_SIZE +
- (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
- frag_size_max = PAGE_SIZE;
-
- i = 0;
- while (buf_size < byte_count) {
- int frag_size = byte_count - buf_size;
-
- if (i < MLX5E_MAX_RX_FRAGS - 1)
- frag_size = min(frag_size, frag_size_max);
-
- info->arr[i].frag_size = frag_size;
- info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
-
- buf_size += frag_size;
- i++;
- }
- info->num_frags = i;
- /* number of different wqes sharing a page */
- info->wqe_bulk = 1 + (info->num_frags % 2);
-
-out:
- info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
- info->log_num_frags = order_base_2(info->num_frags);
-}
-
-static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
-{
- int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
-
- switch (wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- sz += sizeof(struct mlx5e_rx_wqe_ll);
- break;
- default: /* MLX5_WQ_TYPE_CYCLIC */
- sz += sizeof(struct mlx5e_rx_wqe_cyc);
- }
-
- return order_base_2(sz);
-}
-
-static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
-{
- void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-
- return MLX5_GET(wq, wq, log_wq_sz);
-}
-
-void mlx5e_build_rq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_param *param)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- void *rqc = param->rqc;
- void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
- int ndsegs = 1;
-
- switch (params->rq_wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- MLX5_SET(wq, wq, log_wqe_num_of_strides,
- mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
- MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
- MLX5_SET(wq, wq, log_wqe_stride_size,
- mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
- MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
- MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
- break;
- default: /* MLX5_WQ_TYPE_CYCLIC */
- MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
- ndsegs = param->frags_info.num_frags;
- }
-
- MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
- MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
- MLX5_SET(wq, wq, log_wq_stride,
- mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
- MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn);
- MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
- MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
- MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
-
- param->wq.buf_numa_node = dev_to_node(mdev->device);
-}
-
-static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
- struct mlx5e_rq_param *param)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- void *rqc = param->rqc;
- void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-
- MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
- MLX5_SET(wq, wq, log_wq_stride,
- mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
- MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
-
- param->wq.buf_numa_node = dev_to_node(mdev->device);
-}
-
-void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
- struct mlx5e_sq_param *param)
-{
- void *sqc = param->sqc;
- void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-
- MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
- MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn);
-
- param->wq.buf_numa_node = dev_to_node(priv->mdev->device);
-}
-
-static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param)
-{
- void *sqc = param->sqc;
- void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
- bool allow_swp;
-
- allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
- !!MLX5_IPSEC_DEV(priv->mdev);
- mlx5e_build_sq_param_common(priv, param);
- MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
- MLX5_SET(sqc, sqc, allow_swp, allow_swp);
-}
-
-static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_cq_param *param)
-{
- void *cqc = param->cqc;
-
- MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
- if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
- MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
-}
-
-void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_cq_param *param)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- void *cqc = param->cqc;
- u8 log_cq_size;
-
- switch (params->rq_wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
- mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
- break;
- default: /* MLX5_WQ_TYPE_CYCLIC */
- log_cq_size = params->log_rq_mtu_frames;
- }
-
- MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
- if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
- MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
- MLX5_SET(cqc, cqc, cqe_comp_en, 1);
- }
-
- mlx5e_build_common_cq_param(priv, param);
- param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
-}
-
-void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_cq_param *param)
-{
- void *cqc = param->cqc;
-
- MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
-
- mlx5e_build_common_cq_param(priv, param);
- param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
-}
-
-void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_cq_param *param)
-{
- void *cqc = param->cqc;
-
- MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
-
- mlx5e_build_common_cq_param(priv, param);
-
- param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
-}
-
-void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_sq_param *param)
-{
- void *sqc = param->sqc;
- void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-
- mlx5e_build_sq_param_common(priv, param);
-
- MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
- MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
-}
-
-void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param)
-{
- void *sqc = param->sqc;
- void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-
- mlx5e_build_sq_param_common(priv, param);
- MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
- param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
-}
-
-static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
- struct mlx5e_rq_param *rqp)
-{
- switch (params->rq_wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return order_base_2(MLX5E_UMR_WQEBBS) +
- mlx5e_get_rq_log_wq_sz(rqp->rqc);
- default: /* MLX5_WQ_TYPE_CYCLIC */
- return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
- }
-}
-
-static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_channel_param *cparam)
-{
- u8 icosq_log_wq_sz;
-
- mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
-
- icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
-
- mlx5e_build_sq_param(priv, params, &cparam->sq);
- mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
- mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
- mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
- mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
- mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
-}
-
int mlx5e_open_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *chs)
{
@@ -2329,22 +2519,41 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
if (!chs->c || !cparam)
goto err_free;
- mlx5e_build_channel_param(priv, &chs->params, cparam);
+ err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
+ if (err)
+ goto err_free;
+
for (i = 0; i < chs->num; i++) {
- struct xdp_umem *umem = NULL;
+ struct xsk_buff_pool *xsk_pool = NULL;
if (chs->params.xdp_prog)
- umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+ xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
+
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
+ if (err)
+ goto err_close_channels;
+ }
- err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
+ if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
+ err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
if (err)
goto err_close_channels;
}
+ if (priv->htb) {
+ err = mlx5e_qos_open_queues(priv, chs);
+ if (err)
+ goto err_close_ptp;
+ }
+
mlx5e_health_channels_update(priv);
kvfree(cparam);
return 0;
+err_close_ptp:
+ if (chs->ptp)
+ mlx5e_ptp_close(chs->ptp);
+
err_close_channels:
for (i--; i >= 0; i--)
mlx5e_close_channel(chs->c[i]);
@@ -2362,9 +2571,10 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs)
for (i = 0; i < chs->num; i++)
mlx5e_activate_channel(chs->c[i]);
-}
-#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
+ if (chs->ptp)
+ mlx5e_ptp_activate_channel(chs->ptp);
+}
static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
{
@@ -2373,8 +2583,12 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
for (i = 0; i < chs->num; i++) {
int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ continue;
- err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+ err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout);
/* Don't wait on the XSK RQ, because the newer xdpsock sample
* doesn't provide any Fill Ring entries at the setup stage.
@@ -2388,6 +2602,9 @@ static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
{
int i;
+ if (chs->ptp)
+ mlx5e_ptp_deactivate_channel(chs->ptp);
+
for (i = 0; i < chs->num; i++)
mlx5e_deactivate_channel(chs->c[i]);
}
@@ -2396,6 +2613,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
{
int i;
+ if (chs->ptp) {
+ mlx5e_ptp_close(chs->ptp);
+ chs->ptp = NULL;
+ }
for (i = 0; i < chs->num; i++)
mlx5e_close_channel(chs->c[i]);
@@ -2403,586 +2624,402 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
chs->num = 0;
}
-static int
-mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
+static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
{
- struct mlx5_core_dev *mdev = priv->mdev;
- void *rqtc;
- int inlen;
- int err;
- u32 *in;
- int i;
-
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ struct mlx5e_rx_res *res = priv->rx_res;
- rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
-
- MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
- MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
-
- for (i = 0; i < sz; i++)
- MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
-
- err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
- if (!err)
- rqt->enabled = true;
-
- kvfree(in);
- return err;
+ return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
}
-void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
-{
- rqt->enabled = false;
- mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
-}
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
+static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
{
- struct mlx5e_rqt *rqt = &priv->indir_rqt;
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
int err;
- err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
+ err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
if (err)
- mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
- return err;
-}
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
-{
- int err;
- int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
- if (unlikely(err))
- goto err_destroy_rqts;
- }
+ return err;
+ /* Update vport context MTU */
+ mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
return 0;
-
-err_destroy_rqts:
- mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
- for (ix--; ix >= 0; ix--)
- mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
-
- return err;
-}
-
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
-{
- int i;
-
- for (i = 0; i < priv->max_nch; i++)
- mlx5e_destroy_rqt(priv, &tirs[i].rqt);
-}
-
-static int mlx5e_rx_hash_fn(int hfunc)
-{
- return (hfunc == ETH_RSS_HASH_TOP) ?
- MLX5_RX_HASH_FN_TOEPLITZ :
- MLX5_RX_HASH_FN_INVERTED_XOR8;
}
-int mlx5e_bits_invert(unsigned long a, int size)
-{
- int inv = 0;
- int i;
-
- for (i = 0; i < size; i++)
- inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
-
- return inv;
-}
-
-static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
- struct mlx5e_redirect_rqt_param rrp, void *rqtc)
+static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 *mtu)
{
- int i;
-
- for (i = 0; i < sz; i++) {
- u32 rqn;
-
- if (rrp.is_rss) {
- int ix = i;
+ u16 hw_mtu = 0;
+ int err;
- if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
- ix = mlx5e_bits_invert(i, ilog2(sz));
+ err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
+ if (err || !hw_mtu) /* fallback to port oper mtu */
+ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
- ix = priv->rss_params.indirection_rqt[ix];
- rqn = rrp.rss.channels->c[ix]->rq.rqn;
- } else {
- rqn = rrp.rqn;
- }
- MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
- }
+ *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
}
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
- struct mlx5e_redirect_rqt_param rrp)
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- void *rqtc;
- int inlen;
- u32 *in;
+ u16 mtu;
int err;
- inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+ err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
+ if (err)
+ return err;
- MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
- MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
- mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
- err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
+ mlx5e_query_mtu(mdev, params, &mtu);
+ if (mtu != params->sw_mtu)
+ netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
+ __func__, mtu, params->sw_mtu);
- kvfree(in);
- return err;
+ params->sw_mtu = mtu;
+ return 0;
}
-static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
- struct mlx5e_redirect_rqt_param rrp)
-{
- if (!rrp.is_rss)
- return rrp.rqn;
-
- if (ix >= rrp.rss.channels->num)
- return priv->drop_rq.rqn;
-
- return rrp.rss.channels->c[ix]->rq.rqn;
-}
+MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu);
-static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
- struct mlx5e_redirect_rqt_param rrp)
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
{
- u32 rqtn;
- int ix;
-
- if (priv->indir_rqt.enabled) {
- /* RSS RQ table */
- rqtn = priv->indir_rqt.rqtn;
- mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
- }
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- struct mlx5e_redirect_rqt_param direct_rrp = {
- .is_rss = false,
- {
- .rqn = mlx5e_get_direct_rqn(priv, ix, rrp)
- },
- };
-
- /* Direct RQ Tables */
- if (!priv->direct_tir[ix].rqt.enabled)
- continue;
-
- rqtn = priv->direct_tir[ix].rqt.rqtn;
- mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
- }
-}
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
-static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *chs)
-{
- struct mlx5e_redirect_rqt_param rrp = {
- .is_rss = true,
- {
- .rss = {
- .channels = chs,
- .hfunc = priv->rss_params.hfunc,
- }
- },
- };
-
- mlx5e_redirect_rqts(priv, rrp);
-}
-
-static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
-{
- struct mlx5e_redirect_rqt_param drop_rrp = {
- .is_rss = false,
- {
- .rqn = priv->drop_rq.rqn,
- },
- };
-
- mlx5e_redirect_rqts(priv, drop_rrp);
-}
-
-static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
- [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
- .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
- },
- [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
- },
- [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP,
- },
- [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
- .l4_prot_type = 0,
- .rx_hash_fields = MLX5_HASH_IP,
- },
-};
+ /* MTU range: 68 - hw-specific max */
+ netdev->min_mtu = ETH_MIN_MTU;
-struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
-{
- return tirc_default_config[tt];
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
+ ETH_MAX_MTU);
}
-static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
+static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
+ struct netdev_tc_txq *tc_to_txq)
{
- if (!params->lro_en)
- return;
+ int tc, err;
-#define ROUGH_MAX_L2_L3_HDR_SZ 256
+ netdev_reset_tc(netdev);
- MLX5_SET(tirc, tirc, lro_enable_mask,
- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
- MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
- (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
- MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
-}
+ if (ntc == 1)
+ return 0;
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
- const struct mlx5e_tirc_config *ttconfig,
- void *tirc, bool inner)
-{
- void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
- MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ err = netdev_set_num_tc(netdev, ntc);
+ if (err) {
+ netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
+ return err;
+ }
- MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
- if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
- void *rss_key = MLX5_ADDR_OF(tirc, tirc,
- rx_hash_toeplitz_key);
- size_t len = MLX5_FLD_SZ_BYTES(tirc,
- rx_hash_toeplitz_key);
+ for (tc = 0; tc < ntc; tc++) {
+ u16 count, offset;
- MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, rss_params->toeplitz_hash_key, len);
+ count = tc_to_txq[tc].count;
+ offset = tc_to_txq[tc].offset;
+ netdev_set_tc_queue(netdev, tc, count, offset);
}
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- ttconfig->l3_prot_type);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- ttconfig->l4_prot_type);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- ttconfig->rx_hash_fields);
-}
-static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
- enum mlx5e_traffic_types tt,
- u32 rx_hash_fields)
-{
- *ttconfig = tirc_default_config[tt];
- ttconfig->rx_hash_fields = rx_hash_fields;
+ return 0;
}
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
{
- void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
- struct mlx5e_rss_params *rss = &priv->rss_params;
- struct mlx5_core_dev *mdev = priv->mdev;
- int ctxlen = MLX5_ST_SZ_BYTES(tirc);
- struct mlx5e_tirc_config ttconfig;
- int tt;
+ int nch, ntc, num_txqs, err;
+ int qos_queues = 0;
- MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+ if (priv->htb)
+ qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_update_rx_hash_fields(&ttconfig, tt,
- rss->rx_hash_fields[tt]);
- mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
- mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
- }
+ nch = priv->channels.params.num_channels;
+ ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
+ num_txqs = nch * ntc + qos_queues;
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
+ num_txqs += ntc;
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return;
+ mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
+ err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
+ if (err)
+ netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_update_rx_hash_fields(&ttconfig, tt,
- rss->rx_hash_fields[tt]);
- mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
- mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in,
- inlen);
- }
+ return err;
}
-static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
+static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
{
- struct mlx5_core_dev *mdev = priv->mdev;
-
- void *in;
- void *tirc;
- int inlen;
+ struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
+ struct net_device *netdev = priv->netdev;
+ int old_num_txqs, old_ntc;
+ int nch, ntc;
int err;
- int tt;
- int ix;
-
- inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ int i;
- MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
- tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ old_num_txqs = netdev->real_num_tx_queues;
+ old_ntc = netdev->num_tc ? : 1;
+ for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++)
+ old_tc_to_txq[i] = netdev->tc_to_txq[i];
- mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+ nch = priv->channels.params.num_channels;
+ ntc = priv->channels.params.mqprio.num_tc;
+ tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
- inlen);
- if (err)
- goto free_in;
+ err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
+ if (err)
+ goto err_out;
+ err = mlx5e_update_tx_netdev_queues(priv);
+ if (err)
+ goto err_tcs;
+ err = netif_set_real_num_rx_queues(netdev, nch);
+ if (err) {
+ netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
+ goto err_txqs;
}
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
- in, inlen);
- if (err)
- goto free_in;
- }
+ return 0;
-free_in:
- kvfree(in);
+err_txqs:
+ /* netif_set_real_num_rx_queues could fail only when nch increased. Only
+ * one of nch and ntc is changed in this function. That means, the call
+ * to netif_set_real_num_tx_queues below should not fail, because it
+ * decreases the number of TX queues.
+ */
+ WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
+err_tcs:
+ WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc,
+ old_tc_to_txq));
+err_out:
return err;
}
-static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params, u16 mtu)
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
+
+static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
+ struct mlx5e_params *params)
{
- u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
- int err;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int num_comp_vectors, ix, irq;
- err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
- if (err)
- return err;
+ num_comp_vectors = mlx5_comp_vectors_count(mdev);
- /* Update vport context MTU */
- mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
- return 0;
-}
+ for (ix = 0; ix < params->num_channels; ix++) {
+ cpumask_clear(priv->scratchpad.cpumask);
-static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params, u16 *mtu)
-{
- u16 hw_mtu = 0;
- int err;
+ for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
- err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
- if (err || !hw_mtu) /* fallback to port oper mtu */
- mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
+ cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
+ }
- *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
+ netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
+ }
}
-int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+static int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
{
- struct mlx5e_params *params = &priv->channels.params;
- struct net_device *netdev = priv->netdev;
- struct mlx5_core_dev *mdev = priv->mdev;
- u16 mtu;
+ u16 count = priv->channels.params.num_channels;
int err;
- err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
+ err = mlx5e_update_netdev_queues(priv);
if (err)
return err;
- mlx5e_query_mtu(mdev, params, &mtu);
- if (mtu != params->sw_mtu)
- netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
- __func__, mtu, params->sw_mtu);
-
- params->sw_mtu = mtu;
- return 0;
-}
-
-void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
-{
- struct mlx5e_params *params = &priv->channels.params;
- struct net_device *netdev = priv->netdev;
- struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
+ mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
- /* MTU range: 68 - hw-specific max */
- netdev->min_mtu = ETH_MIN_MTU;
+ /* This function may be called on attach, before priv->rx_res is created. */
+ if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
+ mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
- ETH_MAX_MTU);
+ return 0;
}
-static void mlx5e_netdev_set_tcs(struct net_device *netdev)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- int nch = priv->channels.params.num_channels;
- int ntc = priv->channels.params.num_tc;
- int tc;
-
- netdev_reset_tc(netdev);
-
- if (ntc == 1)
- return;
-
- netdev_set_num_tc(netdev, ntc);
-
- /* Map netdev TCs to offset 0
- * We have our own UP to TXQ mapping for QoS
- */
- for (tc = 0; tc < ntc; tc++)
- netdev_set_tc_queue(netdev, tc, nch, 0);
-}
+MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed);
static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
{
- int i, ch;
+ int i, ch, tc, num_tc;
ch = priv->channels.num;
+ num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
for (i = 0; i < ch; i++) {
- int tc;
-
- for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ for (tc = 0; tc < num_tc; tc++) {
struct mlx5e_channel *c = priv->channels.c[i];
struct mlx5e_txqsq *sq = &c->sq[tc];
priv->txq2sq[sq->txq_ix] = sq;
- priv->channel_tc2realtxq[i][tc] = i + tc * ch;
}
}
+
+ if (!priv->channels.ptp)
+ goto out;
+
+ if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
+ goto out;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_ptp *c = priv->channels.ptp;
+ struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
+
+ priv->txq2sq[sq->txq_ix] = sq;
+ }
+
+out:
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
}
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
- int num_txqs = priv->channels.num * priv->channels.params.num_tc;
- int num_rxqs = priv->channels.num * priv->profile->rq_groups;
- struct net_device *netdev = priv->netdev;
-
- mlx5e_netdev_set_tcs(netdev);
- netif_set_real_num_tx_queues(netdev, num_txqs);
- netif_set_real_num_rx_queues(netdev, num_rxqs);
-
mlx5e_build_txq_maps(priv);
mlx5e_activate_channels(&priv->channels);
+ if (priv->htb)
+ mlx5e_qos_activate_queues(priv);
mlx5e_xdp_tx_enable(priv);
+
+ /* dev_watchdog() wants all TX queues to be started when the carrier is
+ * OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
+ * Make it happy to avoid TX timeout false alarms.
+ */
netif_tx_start_all_queues(priv->netdev);
if (mlx5e_is_vport_rep(priv))
- mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_activate_channels(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
- mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
- mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
}
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
- mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
-
- mlx5e_redirect_rqts_to_drop(priv);
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_deactivate(priv->rx_res);
if (mlx5e_is_vport_rep(priv))
- mlx5e_remove_sqs_fwd_rules(priv);
+ mlx5e_rep_deactivate_channels(priv);
- /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
- * polling for inactive tx queues.
+ /* The results of ndo_select_queue are unreliable, while netdev config
+ * is being changed (real_num_tx_queues, num_tc). Stop all queues to
+ * prevent ndo_start_xmit from being called, so that it can assume that
+ * the selected queue is always valid.
*/
- netif_tx_stop_all_queues(priv->netdev);
netif_tx_disable(priv->netdev);
+
mlx5e_xdp_tx_disable(priv);
mlx5e_deactivate_channels(&priv->channels);
}
-static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *new_chs,
- mlx5e_fp_hw_modify hw_modify)
+static int mlx5e_switch_priv_params(struct mlx5e_priv *priv,
+ struct mlx5e_params *new_params,
+ mlx5e_fp_preactivate preactivate,
+ void *context)
+{
+ struct mlx5e_params old_params;
+
+ old_params = priv->channels.params;
+ priv->channels.params = *new_params;
+
+ if (preactivate) {
+ int err;
+
+ err = preactivate(priv, context);
+ if (err) {
+ priv->channels.params = old_params;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_preactivate preactivate,
+ void *context)
{
struct net_device *netdev = priv->netdev;
- int new_num_txqs;
+ struct mlx5e_channels old_chs;
int carrier_ok;
-
- new_num_txqs = new_chs->num * new_chs->params.num_tc;
+ int err = 0;
carrier_ok = netif_carrier_ok(netdev);
netif_carrier_off(netdev);
- if (new_num_txqs < netdev->real_num_tx_queues)
- netif_set_real_num_tx_queues(netdev, new_num_txqs);
-
mlx5e_deactivate_priv_channels(priv);
- mlx5e_close_channels(&priv->channels);
+ old_chs = priv->channels;
priv->channels = *new_chs;
- /* New channels are ready to roll, modify HW settings if needed */
- if (hw_modify)
- hw_modify(priv);
+ /* New channels are ready to roll, call the preactivate hook if needed
+ * to modify HW settings or update kernel parameters.
+ */
+ if (preactivate) {
+ err = preactivate(priv, context);
+ if (err) {
+ priv->channels = old_chs;
+ goto out;
+ }
+ }
+ mlx5e_close_channels(&old_chs);
priv->profile->update_rx(priv);
+
+ mlx5e_selq_apply(&priv->selq);
+out:
mlx5e_activate_priv_channels(priv);
/* return carrier back if needed */
if (carrier_ok)
netif_carrier_on(netdev);
+
+ return err;
}
-int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *new_chs,
- mlx5e_fp_hw_modify hw_modify)
+int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ mlx5e_fp_preactivate preactivate,
+ void *context, bool reset)
{
+ struct mlx5e_channels new_chs = {};
int err;
- err = mlx5e_open_channels(priv, new_chs);
+ reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (!reset)
+ return mlx5e_switch_priv_params(priv, params, preactivate, context);
+
+ new_chs.params = *params;
+
+ mlx5e_selq_prepare_params(&priv->selq, &new_chs.params);
+
+ err = mlx5e_open_channels(priv, &new_chs);
if (err)
- return err;
+ goto err_cancel_selq;
+
+ err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
+ if (err)
+ goto err_close;
- mlx5e_switch_priv_channels(priv, new_chs, hw_modify);
return 0;
+
+err_close:
+ mlx5e_close_channels(&new_chs);
+
+err_cancel_selq:
+ mlx5e_selq_cancel(&priv->selq);
+ return err;
}
int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
{
- struct mlx5e_channels new_channels = {};
-
- new_channels.params = priv->channels.params;
- return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true);
}
void mlx5e_timestamp_init(struct mlx5e_priv *priv)
@@ -2991,11 +3028,33 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
}
+static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
+ enum mlx5_port_status state)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int vport_admin_state;
+
+ mlx5_set_port_admin_status(mdev, state);
+
+ if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS ||
+ !MLX5_CAP_GEN(mdev, uplink_follow))
+ return;
+
+ if (state == MLX5_PORT_UP)
+ vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ else
+ vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+
+ mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
+}
+
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
+ mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params);
+
set_bit(MLX5E_STATE_OPENED, &priv->state);
err = mlx5e_open_channels(priv, &priv->channels);
@@ -3003,7 +3062,9 @@ int mlx5e_open_locked(struct net_device *netdev)
goto err_clear_state_opened_flag;
priv->profile->update_rx(priv);
+ mlx5e_selq_apply(&priv->selq);
mlx5e_activate_priv_channels(priv);
+ mlx5e_apply_traps(priv, true);
if (priv->profile->update_carrier)
priv->profile->update_carrier(priv);
@@ -3012,6 +3073,7 @@ int mlx5e_open_locked(struct net_device *netdev)
err_clear_state_opened_flag:
clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ mlx5e_selq_cancel(&priv->selq);
return err;
}
@@ -3023,12 +3085,9 @@ int mlx5e_open(struct net_device *netdev)
mutex_lock(&priv->state_lock);
err = mlx5e_open_locked(netdev);
if (!err)
- mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
+ mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
mutex_unlock(&priv->state_lock);
- if (mlx5_vxlan_allowed(priv->mdev->vxlan))
- udp_tunnel_get_rx_info(netdev);
-
return err;
}
@@ -3042,6 +3101,7 @@ int mlx5e_close_locked(struct net_device *netdev)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
+ mlx5e_apply_traps(priv, false);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev);
@@ -3060,13 +3120,18 @@ int mlx5e_close(struct net_device *netdev)
return -ENODEV;
mutex_lock(&priv->state_lock);
- mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
+ mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
err = mlx5e_close_locked(netdev);
mutex_unlock(&priv->state_lock);
return err;
}
+static void mlx5e_free_drop_rq(struct mlx5e_rq *rq)
+{
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
struct mlx5e_rq *rq,
struct mlx5e_rq_param *param)
@@ -3090,14 +3155,16 @@ static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
return 0;
}
-static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
+static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv,
struct mlx5e_cq *cq,
struct mlx5e_cq_param *param)
{
- param->wq.buf_numa_node = dev_to_node(mdev->device);
- param->wq.db_numa_node = dev_to_node(mdev->device);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
- return mlx5e_alloc_cq_common(mdev, param, cq);
+ return mlx5e_alloc_cq_common(priv, param, cq);
}
int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
@@ -3109,9 +3176,9 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
struct mlx5e_cq *cq = &drop_rq->cq;
int err;
- mlx5e_build_drop_rq_param(priv, &rq_param);
+ mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
- err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
+ err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
if (err)
return err;
@@ -3134,7 +3201,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
return 0;
err_free_rq:
- mlx5e_free_rq(drop_rq);
+ mlx5e_free_drop_rq(drop_rq);
err_destroy_cq:
mlx5e_destroy_cq(cq);
@@ -3148,7 +3215,7 @@ err_free_cq:
void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
{
mlx5e_destroy_rq(drop_rq);
- mlx5e_free_rq(drop_rq);
+ mlx5e_free_drop_rq(drop_rq);
mlx5e_destroy_cq(&drop_rq->cq);
mlx5e_free_cq(&drop_rq->cq);
}
@@ -3157,15 +3224,15 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
{
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
if (MLX5_GET(tisc, tisc, tls_en))
- MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
if (mlx5_lag_is_lacp_owner(mdev))
MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
- return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn);
+ return mlx5_core_create_tis(mdev, in, tisn);
}
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
@@ -3224,258 +3291,292 @@ err_close_tises:
static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
{
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ priv->mqprio_rl = NULL;
+ }
+ mlx5e_accel_cleanup_tx(priv);
mlx5e_destroy_tises(priv);
}
-static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
- u32 rqtn, u32 *tirc)
+static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
{
- MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
- MLX5_SET(tirc, tirc, indirect_table, rqtn);
- MLX5_SET(tirc, tirc, tunneled_offload_en,
- priv->channels.params.tunneled_offload_en);
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
+ if (err)
+ return err;
+ }
- mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+ return 0;
}
-static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types tt,
- u32 *tirc)
+static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
{
- mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
- mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
- &tirc_default_config[tt], tirc, false);
+ int err;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
+ if (err)
+ return err;
+ }
+ if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+ return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
+
+ return 0;
}
-static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
+static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
+ int ntc, int nch)
{
- mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc);
- MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+ int tc;
+
+ memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE);
+
+ /* Map netdev TCs to offset 0.
+ * We have our own UP to TXQ mapping for DCB mode of QoS
+ */
+ for (tc = 0; tc < ntc; tc++) {
+ tc_to_txq[tc] = (struct netdev_tc_txq) {
+ .count = nch,
+ .offset = 0,
+ };
+ }
}
-static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types tt,
- u32 *tirc)
+static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
+ struct tc_mqprio_qopt *qopt)
{
- mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
- mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
- &tirc_default_config[tt], tirc, true);
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ tc_to_txq[tc] = (struct netdev_tc_txq) {
+ .count = qopt->count[tc],
+ .offset = qopt->offset[tc],
+ };
+ }
}
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
+static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc)
{
- struct mlx5e_tir *tir;
- void *tirc;
- int inlen;
- int i = 0;
- int err;
- u32 *in;
- int tt;
-
- inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ params->mqprio.mode = TC_MQPRIO_MODE_DCB;
+ params->mqprio.num_tc = num_tc;
+ mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc,
+ params->num_channels);
+}
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(in, 0, inlen);
- tir = &priv->indir_tir[tt];
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_indir_tir_ctx(priv, tt, tirc);
- err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
- if (err) {
- mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
- goto err_destroy_inner_tirs;
- }
- }
+static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params,
+ struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
- if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- goto out;
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ u32 hw_id = 0;
- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
- memset(in, 0, inlen);
- tir = &priv->inner_indir_tir[i];
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
- err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
- if (err) {
- mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
- goto err_destroy_inner_tirs;
- }
+ if (rl)
+ mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id);
+ params->mqprio.channel.hw_id[tc] = hw_id;
}
+}
-out:
- kvfree(in);
-
- return 0;
+static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params,
+ struct tc_mqprio_qopt_offload *mqprio,
+ struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
-err_destroy_inner_tirs:
- for (i--; i >= 0; i--)
- mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+ params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
+ params->mqprio.num_tc = mqprio->qopt.num_tc;
- for (tt--; tt >= 0; tt--)
- mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++)
+ params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc];
- kvfree(in);
+ mlx5e_mqprio_rl_update_params(params, rl);
+ mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt);
+}
- return err;
+static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
+{
+ mlx5e_params_mqprio_dcb_set(params, 1);
}
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
+static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt *mqprio)
{
- struct mlx5e_tir *tir;
- void *tirc;
- int inlen;
- int err = 0;
- u32 *in;
- int ix;
+ struct mlx5e_params new_params;
+ u8 tc = mqprio->num_tc;
+ int err;
- inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
- for (ix = 0; ix < priv->max_nch; ix++) {
- memset(in, 0, inlen);
- tir = &tirs[ix];
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
- err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
- if (unlikely(err))
- goto err_destroy_ch_tirs;
- }
+ if (tc && tc != MLX5E_MAX_NUM_TC)
+ return -EINVAL;
- goto out;
+ new_params = priv->channels.params;
+ mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
-err_destroy_ch_tirs:
- mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
- for (ix--; ix >= 0; ix--)
- mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
-out:
- kvfree(in);
+ if (!err && priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ priv->mqprio_rl = NULL;
+ }
+ priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+ mlx5e_get_dcb_num_tc(&priv->channels.params));
return err;
}
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
+static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_ptp *ptp_channel;
+ int agg_count = 0;
int i;
- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
- mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+ ptp_channel = priv->channels.ptp;
+ if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) {
+ netdev_err(netdev,
+ "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n");
+ return -EINVAL;
+ }
- if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return;
+ if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
+ mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
+ return -EINVAL;
- for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
- mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
-}
+ for (i = 0; i < mqprio->qopt.num_tc; i++) {
+ if (!mqprio->qopt.count[i]) {
+ netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
+ return -EINVAL;
+ }
+ if (mqprio->min_rate[i]) {
+ netdev_err(netdev, "Min tx rate is not supported\n");
+ return -EINVAL;
+ }
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
-{
- int i;
+ if (mqprio->max_rate[i]) {
+ int err;
- for (i = 0; i < priv->max_nch; i++)
- mlx5e_destroy_tir(priv->mdev, &tirs[i]);
-}
+ err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]);
+ if (err)
+ return err;
+ }
-static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
-{
- int err = 0;
- int i;
+ if (mqprio->qopt.offset[i] != agg_count) {
+ netdev_err(netdev, "Discontinuous queues config is not supported\n");
+ return -EINVAL;
+ }
+ agg_count += mqprio->qopt.count[i];
+ }
- for (i = 0; i < chs->num; i++) {
- err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
- if (err)
- return err;
+ if (priv->channels.params.num_channels != agg_count) {
+ netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n",
+ agg_count, priv->channels.params.num_channels);
+ return -EINVAL;
}
return 0;
}
-static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[])
{
- int err = 0;
- int i;
-
- for (i = 0; i < chs->num; i++) {
- err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
- if (err)
- return err;
- }
+ int tc;
- return 0;
+ for (tc = 0; tc < num_tc; tc++)
+ if (max_rate[tc])
+ return true;
+ return false;
}
-static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
- struct tc_mqprio_qopt *mqprio)
+static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev,
+ u8 num_tc, u64 max_rate[])
{
- struct mlx5e_channels new_channels = {};
- u8 tc = mqprio->num_tc;
- int err = 0;
+ struct mlx5e_mqprio_rl *rl;
+ int err;
- mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+ if (!mlx5e_mqprio_rate_limit(num_tc, max_rate))
+ return NULL;
- if (tc && tc != MLX5E_MAX_NUM_TC)
- return -EINVAL;
+ rl = mlx5e_mqprio_rl_alloc();
+ if (!rl)
+ return ERR_PTR(-ENOMEM);
- mutex_lock(&priv->state_lock);
+ err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate);
+ if (err) {
+ mlx5e_mqprio_rl_free(rl);
+ return ERR_PTR(err);
+ }
- new_channels.params = priv->channels.params;
- new_channels.params.num_tc = tc ? tc : 1;
+ return rl;
+}
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- goto out;
- }
+static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
+{
+ mlx5e_fp_preactivate preactivate;
+ struct mlx5e_params new_params;
+ struct mlx5e_mqprio_rl *rl;
+ bool nch_changed;
+ int err;
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ err = mlx5e_mqprio_channel_validate(priv, mqprio);
if (err)
- goto out;
+ return err;
- priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
- new_channels.params.num_tc);
-out:
- mutex_unlock(&priv->state_lock);
- return err;
-}
+ rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate);
+ if (IS_ERR(rl))
+ return PTR_ERR(rl);
-#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct flow_cls_offload *cls_flower,
- unsigned long flags)
-{
- switch (cls_flower->command) {
- case FLOW_CLS_REPLACE:
- return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_DESTROY:
- return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_STATS:
- return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
- flags);
- default:
- return -EOPNOTSUPP;
+ new_params = priv->channels.params;
+ mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl);
+
+ nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1;
+ preactivate = nch_changed ? mlx5e_num_channels_changed_ctx :
+ mlx5e_update_netdev_queues_ctx;
+ err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true);
+ if (err) {
+ if (rl) {
+ mlx5e_mqprio_rl_cleanup(rl);
+ mlx5e_mqprio_rl_free(rl);
+ }
+ return err;
+ }
+
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
}
+ priv->mqprio_rl = rl;
+
+ return 0;
}
-static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
+static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
{
- unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
- struct mlx5e_priv *priv = cb_priv;
+ /* MQPRIO is another toplevel qdisc that can't be attached
+ * simultaneously with the offloaded HTB.
+ */
+ if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
+ return -EINVAL;
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
+ switch (mqprio->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
+ case TC_MQPRIO_MODE_CHANNEL:
+ return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
default:
return -EOPNOTSUPP;
}
}
-#endif
static LIST_HEAD(mlx5e_block_cb_list);
@@ -3483,9 +3584,17 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ bool tc_unbind = false;
+ int err;
+
+ if (type == TC_SETUP_BLOCK &&
+ ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
+ tc_unbind = true;
+
+ if (!netif_device_present(dev) && !tc_unbind)
+ return -ENODEV;
switch (type) {
-#ifdef CONFIG_MLX5_ESWITCH
case TC_SETUP_BLOCK: {
struct flow_block_offload *f = type_data;
@@ -3495,9 +3604,16 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
mlx5e_setup_tc_block_cb,
priv, priv, true);
}
-#endif
case TC_SETUP_QDISC_MQPRIO:
- return mlx5e_setup_tc_mqprio(priv, type_data);
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_setup_tc_mqprio(priv, type_data);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ case TC_SETUP_QDISC_HTB:
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_htb_setup_tc(priv, type_data);
+ mutex_unlock(&priv->state_lock);
+ return err;
default:
return -EOPNOTSUPP;
}
@@ -3507,14 +3623,15 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
{
int i;
- for (i = 0; i < priv->max_nch; i++) {
- struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+ for (i = 0; i < priv->stats_nch; i++) {
+ struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
int j;
s->rx_packets += rq_stats->packets + xskrq_stats->packets;
s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes;
+ s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
for (j = 0; j < priv->max_opened_tc; j++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -3524,25 +3641,55 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
s->tx_dropped += sq_stats->dropped;
}
}
+ if (priv->tx_ptp_opened) {
+ for (i = 0; i < priv->max_opened_tc; i++) {
+ struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_dropped += sq_stats->dropped;
+ }
+ }
+ if (priv->rx_ptp_opened) {
+ struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+ s->multicast += rq_stats->mcast_packets;
+ }
}
void
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_vport_stats *vstats = &priv->stats.vport;
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
- if (!mlx5e_monitor_counter_supported(priv)) {
+ if (!netif_device_present(dev))
+ return;
+
+ /* In switchdev mode, monitor counters doesn't monitor
+ * rx/tx stats of 802_3. The update stats mechanism
+ * should keep the 802_3 layout counters updated
+ */
+ if (!mlx5e_monitor_counter_supported(priv) ||
+ mlx5e_is_uplink_rep(priv)) {
/* update HW stats in background for next time */
mlx5e_queue_update_stats(priv);
}
if (mlx5e_is_uplink_rep(priv)) {
+ struct mlx5e_vport_stats *vstats = &priv->stats.vport;
+
stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+
+ /* vport multicast also counts packets that are dropped due to steering
+ * or rx out of buffer
+ */
+ stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
} else {
mlx5e_fold_sw_stats64(priv, stats);
}
@@ -3552,7 +3699,8 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->rx_length_errors =
PPORT_802_3_GET(pstats, a_in_range_length_errors) +
PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
- PPORT_802_3_GET(pstats, a_frame_too_long_errors);
+ PPORT_802_3_GET(pstats, a_frame_too_long_errors) +
+ VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small);
stats->rx_crc_errors =
PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
@@ -3560,19 +3708,21 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
stats->rx_frame_errors;
stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
+}
- /* vport multicast also counts packets that are dropped due to steering
- * or rx out of buffer
- */
- stats->multicast =
- VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
+static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
+{
+ if (mlx5e_is_uplink_rep(priv))
+ return; /* no rx mode for uplink rep */
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
}
static void mlx5e_set_rx_mode(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- queue_work(priv->wq, &priv->set_rx_mode_work);
+ mlx5e_nic_set_rx_mode(priv);
}
static int mlx5e_set_mac(struct net_device *netdev, void *addr)
@@ -3584,10 +3734,10 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr)
return -EADDRNOTAVAIL;
netif_addr_lock_bh(netdev);
- ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+ eth_hw_addr_set(netdev, saddr->sa_data);
netif_addr_unlock_bh(netdev);
- queue_work(priv->wq, &priv->set_rx_mode_work);
+ mlx5e_nic_set_rx_mode(priv);
return 0;
}
@@ -3606,45 +3756,62 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_channels new_channels = {};
- struct mlx5e_params *old_params;
+ struct mlx5e_params *cur_params;
+ struct mlx5e_params new_params;
+ bool reset = true;
int err = 0;
- bool reset;
mutex_lock(&priv->state_lock);
- if (enable && priv->xsk.refcnt) {
- netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n",
- priv->xsk.refcnt);
- err = -EINVAL;
- goto out;
- }
+ cur_params = &priv->channels.params;
+ new_params = *cur_params;
- old_params = &priv->channels.params;
- if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
- netdev_warn(netdev, "can't set LRO with legacy RQ\n");
- err = -EINVAL;
+ if (enable)
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
+ else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ else
goto out;
- }
- reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
+ new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
+ if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+ reset = false;
+ }
+ }
- new_channels.params = *old_params;
- new_channels.params.lro_en = enable;
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
- if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) ==
- mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
- reset = false;
- }
+static int set_feature_hw_gro(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params new_params;
+ bool reset = true;
+ int err = 0;
- if (!reset) {
- *old_params = new_channels.params;
- err = mlx5e_modify_tirs_lro(priv);
+ mutex_lock(&priv->state_lock);
+ new_params = priv->channels.params;
+
+ if (enable) {
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
+ new_params.packet_merge.shampo.match_criteria_type =
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
+ new_params.packet_merge.shampo.alignment_granularity =
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
+ } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ } else {
goto out;
}
- err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
out:
mutex_unlock(&priv->state_lock);
return err;
@@ -3655,27 +3822,39 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
struct mlx5e_priv *priv = netdev_priv(netdev);
if (enable)
- mlx5e_enable_cvlan_filter(priv);
+ mlx5e_enable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
else
- mlx5e_disable_cvlan_filter(priv);
+ mlx5e_disable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
return 0;
}
-#ifdef CONFIG_MLX5_ESWITCH
-static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
+static int set_feature_hw_tc(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
+ MLX5_TC_FLAG(NIC_OFFLOAD);
+ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
}
+#endif
- return 0;
+ mutex_lock(&priv->state_lock);
+ if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
+ err = -EINVAL;
+ }
+ mutex_unlock(&priv->state_lock);
+
+ return err;
}
-#endif
static int set_feature_rx_all(struct net_device *netdev, bool enable)
{
@@ -3685,20 +3864,67 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable)
return mlx5_set_port_fcs(mdev, !enable);
}
+static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {};
+ bool supported, curr_state;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return 0;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap);
+ curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc);
+
+ if (!supported || enable == curr_state)
+ return 0;
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable);
+
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *chs = &priv->channels;
+ struct mlx5_core_dev *mdev = priv->mdev;
int err;
mutex_lock(&priv->state_lock);
- priv->channels.params.scatter_fcs_en = enable;
- err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable);
- if (err)
- priv->channels.params.scatter_fcs_en = !enable;
+ if (enable) {
+ err = mlx5e_set_rx_port_ts(mdev, false);
+ if (err)
+ goto out;
- mutex_unlock(&priv->state_lock);
+ chs->params.scatter_fcs_en = true;
+ err = mlx5e_modify_channels_scatter_fcs(chs, true);
+ if (err) {
+ chs->params.scatter_fcs_en = false;
+ mlx5e_set_rx_port_ts(mdev, true);
+ }
+ } else {
+ chs->params.scatter_fcs_en = false;
+ err = mlx5e_modify_channels_scatter_fcs(chs, false);
+ if (err) {
+ chs->params.scatter_fcs_en = true;
+ goto out;
+ }
+ err = mlx5e_set_rx_port_ts(mdev, true);
+ if (err) {
+ mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err);
+ err = 0;
+ }
+ }
+out:
+ mutex_unlock(&priv->state_lock);
return err;
}
@@ -3709,20 +3935,45 @@ static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable);
priv->channels.params.vlan_strip_disable = !enable;
+
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
- if (err)
+ if (err) {
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, enable);
priv->channels.params.vlan_strip_disable = enable;
-
+ }
unlock:
mutex_unlock(&priv->state_lock);
return err;
}
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_flow_steering *fs = priv->fs;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return 0; /* no vlan table for uplink rep */
+
+ return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid);
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_flow_steering *fs = priv->fs;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return 0; /* no vlan table for uplink rep */
+
+ return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid);
+}
+
#ifdef CONFIG_MLX5_EN_ARFS
static int set_feature_arfs(struct net_device *netdev, bool enable)
{
@@ -3730,9 +3981,9 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
int err;
if (enable)
- err = mlx5e_arfs_enable(priv);
+ err = mlx5e_arfs_enable(priv->fs);
else
- err = mlx5e_arfs_disable(priv);
+ err = mlx5e_arfs_disable(priv->fs);
return err;
}
@@ -3740,12 +3991,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
static int mlx5e_handle_feature(struct net_device *netdev,
netdev_features_t *features,
- netdev_features_t wanted_features,
netdev_features_t feature,
mlx5e_feature_handler feature_handler)
{
- netdev_features_t changes = wanted_features ^ netdev->features;
- bool enable = !!(wanted_features & feature);
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
int err;
if (!(changes & feature))
@@ -3753,35 +4003,35 @@ static int mlx5e_handle_feature(struct net_device *netdev,
err = feature_handler(netdev, enable);
if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
netdev_err(netdev, "%s feature %pNF failed, err %d\n",
enable ? "Enable" : "Disable", &feature, err);
return err;
}
- MLX5E_SET_FEATURE(features, feature, enable);
return 0;
}
int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
- netdev_features_t oper_features = netdev->features;
+ netdev_features_t oper_features = features;
int err = 0;
#define MLX5E_HANDLE_FEATURE(feature, handler) \
- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
set_feature_cvlan_filter);
-#ifdef CONFIG_MLX5_ESWITCH
- err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
-#endif
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
#ifdef CONFIG_MLX5_EN_ARFS
err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
#endif
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx);
if (err) {
netdev->features = oper_features;
@@ -3791,15 +4041,40 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
return 0;
}
+static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ features &= ~NETIF_F_HW_TLS_RX;
+ if (netdev->features & NETIF_F_HW_TLS_RX)
+ netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_HW_TLS_TX;
+ if (netdev->features & NETIF_F_HW_TLS_TX)
+ netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_NTUPLE;
+ if (netdev->features & NETIF_F_NTUPLE)
+ netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_GRO_HW;
+ if (netdev->features & NETIF_F_GRO_HW)
+ netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
+ return features;
+}
+
static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
netdev_features_t features)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_vlan_table *vlan;
struct mlx5e_params *params;
+ vlan = mlx5e_fs_get_vlan(priv->fs);
mutex_lock(&priv->state_lock);
params = &priv->channels.params;
- if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
+ if (!vlan ||
+ !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) {
/* HW strips the outer C-tag header, this is a problem
* for S-tag traffic.
*/
@@ -3807,19 +4082,56 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
if (!params->vlan_strip_disable)
netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
}
+
if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
if (features & NETIF_F_LRO) {
netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
features &= ~NETIF_F_LRO;
}
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (params->xdp_prog) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with XDP\n");
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (priv->xsk.refcnt) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_GRO_HW;
+ }
}
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
}
+ if (mlx5e_is_uplink_rep(priv))
+ features = mlx5e_fix_uplink_rep_features(netdev, features);
+
mutex_unlock(&priv->state_lock);
return features;
@@ -3833,13 +4145,14 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
u16 ix;
for (ix = 0; ix < chs->params.num_channels; ix++) {
- struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+ struct xsk_buff_pool *xsk_pool =
+ mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
struct mlx5e_xsk_param xsk;
- if (!umem)
+ if (!xsk_pool)
continue;
- mlx5e_build_xsk_param(umem, &xsk);
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
@@ -3850,10 +4163,10 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
* 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
*/
max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
- max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+ max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
max_mtu = min(max_mtu_frame, max_mtu_page);
- netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n",
+ netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
new_params->sw_mtu, ix, max_mtu);
return false;
}
@@ -3862,81 +4175,145 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
return true;
}
+static bool mlx5e_params_validate_xdp(struct net_device *netdev,
+ struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ bool is_linear;
+
+ /* No XSK params: AF_XDP can't be enabled yet at the point of setting
+ * the XDP program.
+ */
+ is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
+
+ if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
+ netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+ if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
+ netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+
+ return true;
+}
+
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
- change_hw_mtu_cb set_mtu_cb)
+ mlx5e_fp_preactivate preactivate)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
struct mlx5e_params *params;
+ bool reset = true;
int err = 0;
- bool reset;
mutex_lock(&priv->state_lock);
params = &priv->channels.params;
- reset = !params->lro_en;
- reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
-
- new_channels.params = *params;
- new_channels.params.sw_mtu = new_mtu;
+ new_params = *params;
+ new_params.sw_mtu = new_mtu;
+ err = mlx5e_validate_params(priv->mdev, &new_params);
+ if (err)
+ goto out;
- if (params->xdp_prog &&
- !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
- netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, mlx5e_xdp_max_mtu(params, NULL));
+ if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev,
+ &new_params)) {
err = -EINVAL;
goto out;
}
if (priv->xsk.refcnt &&
!mlx5e_xsk_validate_mtu(netdev, &priv->channels,
- &new_channels.params, priv->mdev)) {
+ &new_params, priv->mdev)) {
err = -EINVAL;
goto out;
}
- if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
- &new_channels.params,
- NULL);
- u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
- u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
-
- /* If XSK is active, XSK RQs are linear. */
- is_linear |= priv->xsk.refcnt;
-
- /* Always reset in linear mode - hw_mtu is used in data path. */
- reset = reset && (is_linear || (ppw_old != ppw_new));
- }
-
- if (!reset) {
- params->sw_mtu = new_mtu;
- if (set_mtu_cb)
- set_mtu_cb(priv);
- netdev->mtu = params->sw_mtu;
- goto out;
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+ reset = false;
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) {
+ bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
+ bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+ &new_params, NULL);
+ u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL);
+ u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL);
+
+ /* Always reset in linear mode - hw_mtu is used in data path.
+ * Check that the mode was non-linear and didn't change.
+ * If XSK is active, XSK RQs are linear.
+ * Reset if the RQ size changed, even if it's non-linear.
+ */
+ if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
+ sz_old == sz_new)
+ reset = false;
}
- err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb);
- if (err)
- goto out;
-
- netdev->mtu = new_channels.params.sw_mtu;
+ err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset);
out:
+ netdev->mtu = params->sw_mtu;
mutex_unlock(&priv->state_lock);
return err;
}
static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
{
- return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
+}
+
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
+{
+ bool set = *(bool *)ctx;
+
+ return mlx5e_ptp_rx_manage_fs(priv, set);
+}
+
+static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
+{
+ bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+ int err;
+
+ if (!rx_filter)
+ /* Reset CQE compression to Admin default */
+ return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false);
+
+ if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ return 0;
+
+ /* Disable CQE compression */
+ netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true);
+ if (err)
+ netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+
+ return err;
+}
+
+static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
+{
+ struct mlx5e_params new_params;
+
+ if (ptp_rx == priv->channels.params.ptp_rx)
+ return 0;
+
+ new_params = priv->channels.params;
+ new_params.ptp_rx = ptp_rx;
+ return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+ &new_params.ptp_rx, true);
}
int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
{
struct hwtstamp_config config;
+ bool rx_cqe_compress_def;
+ bool ptp_rx;
int err;
if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
@@ -3956,11 +4333,12 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
}
mutex_lock(&priv->state_lock);
+ rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+
/* RX HW timestamp */
switch (config.rx_filter) {
case HWTSTAMP_FILTER_NONE:
- /* Reset CQE compression to Admin default */
- mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
+ ptp_rx = false;
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
@@ -3977,22 +4355,25 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
case HWTSTAMP_FILTER_NTP_ALL:
- /* Disable CQE compression */
- if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
- netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
- err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
- if (err) {
- netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
- mutex_unlock(&priv->state_lock);
- return err;
- }
config.rx_filter = HWTSTAMP_FILTER_ALL;
+ /* ptp_rx is set if both HW TS is set and CQE
+ * compression is set
+ */
+ ptp_rx = rx_cqe_compress_def;
break;
default:
- mutex_unlock(&priv->state_lock);
- return -ERANGE;
+ err = -ERANGE;
+ goto err_unlock;
}
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ err = mlx5e_hwstamp_config_no_ptp_rx(priv,
+ config.rx_filter != HWTSTAMP_FILTER_NONE);
+ else
+ err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
+ if (err)
+ goto err_unlock;
+
memcpy(&priv->tstamp, &config, sizeof(config));
mutex_unlock(&priv->state_lock);
@@ -4001,6 +4382,9 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
return copy_to_user(ifr->ifr_data, &config,
sizeof(config)) ? -EFAULT : 0;
+err_unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
}
int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
@@ -4103,6 +4487,9 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
+ if (mlx5e_is_uplink_rep(priv))
+ return -EOPNOTSUPP;
+
return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
mlx5_ifla_link2vport(link_state));
}
@@ -4114,6 +4501,9 @@ int mlx5e_get_vf_config(struct net_device *dev,
struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ if (!netif_device_present(dev))
+ return -EOPNOTSUPP;
+
err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
if (err)
return err;
@@ -4130,83 +4520,61 @@ int mlx5e_get_vf_stats(struct net_device *dev,
return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
vf_stats);
}
-#endif
-
-struct mlx5e_vxlan_work {
- struct work_struct work;
- struct mlx5e_priv *priv;
- u16 port;
-};
-static void mlx5e_vxlan_add_work(struct work_struct *work)
+static bool
+mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
{
- struct mlx5e_vxlan_work *vxlan_work =
- container_of(work, struct mlx5e_vxlan_work, work);
- struct mlx5e_priv *priv = vxlan_work->priv;
- u16 port = vxlan_work->port;
-
- mutex_lock(&priv->state_lock);
- mlx5_vxlan_add_port(priv->mdev->vxlan, port);
- mutex_unlock(&priv->state_lock);
+ struct mlx5e_priv *priv = netdev_priv(dev);
- kfree(vxlan_work);
-}
+ if (!netif_device_present(dev))
+ return false;
-static void mlx5e_vxlan_del_work(struct work_struct *work)
-{
- struct mlx5e_vxlan_work *vxlan_work =
- container_of(work, struct mlx5e_vxlan_work, work);
- struct mlx5e_priv *priv = vxlan_work->priv;
- u16 port = vxlan_work->port;
+ if (!mlx5e_is_uplink_rep(priv))
+ return false;
- mutex_lock(&priv->state_lock);
- mlx5_vxlan_del_port(priv->mdev->vxlan, port);
- mutex_unlock(&priv->state_lock);
- kfree(vxlan_work);
+ return mlx5e_rep_has_offload_stats(dev, attr_id);
}
-static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
+static int
+mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
{
- struct mlx5e_vxlan_work *vxlan_work;
-
- vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
- if (!vxlan_work)
- return;
+ struct mlx5e_priv *priv = netdev_priv(dev);
- if (add)
- INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
- else
- INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
+ if (!mlx5e_is_uplink_rep(priv))
+ return -EOPNOTSUPP;
- vxlan_work->priv = priv;
- vxlan_work->port = port;
- queue_work(priv->wq, &vxlan_work->work);
+ return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
}
+#endif
-void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
+static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
- return;
-
- if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
- return;
-
- mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
+ switch (proto_type) {
+ case IPPROTO_GRE:
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx));
+ default:
+ return false;
+ }
}
-void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
+static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
+ struct sk_buff *skb)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
- return;
-
- if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
- return;
-
- mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
+ switch (skb->inner_protocol) {
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_TEB):
+ return true;
+ case htons(ETH_P_MPLS_UC):
+ case htons(ETH_P_MPLS_MC):
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
+ }
+ return false;
}
static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
@@ -4231,10 +4599,12 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
switch (proto) {
case IPPROTO_GRE:
- return features;
+ if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
+ return features;
+ break;
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
+ if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP))
return features;
break;
case IPPROTO_UDP:
@@ -4250,6 +4620,11 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
return features;
#endif
+ break;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ case IPPROTO_ESP:
+ return mlx5e_ipsec_feature_check(skb, features);
+#endif
}
out:
@@ -4266,11 +4641,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb,
features = vlan_features_check(skb, features);
features = vxlan_features_check(skb, features);
-#ifdef CONFIG_MLX5_EN_IPSEC
- if (mlx5e_ipsec_feature_check(skb, netdev, features))
- return features;
-#endif
-
/* Validate if the tunneled packet is being offloaded by HW */
if (skb->encapsulation &&
(features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
@@ -4283,8 +4653,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
tx_timeout_work);
- bool report_failed = false;
- int err;
+ struct net_device *netdev = priv->netdev;
int i;
rtnl_lock();
@@ -4293,27 +4662,19 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
- for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
struct netdev_queue *dev_queue =
- netdev_get_tx_queue(priv->netdev, i);
+ netdev_get_tx_queue(netdev, i);
struct mlx5e_txqsq *sq = priv->txq2sq[i];
if (!netif_xmit_stopped(dev_queue))
continue;
if (mlx5e_reporter_tx_timeout(sq))
- report_failed = true;
+ /* break if tried to reopened channels */
+ break;
}
- if (!report_failed)
- goto unlock;
-
- err = mlx5e_safe_reopen_channels(priv);
- if (err)
- netdev_err(priv->netdev,
- "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
- err);
-
unlock:
mutex_unlock(&priv->state_lock);
rtnl_unlock();
@@ -4330,40 +4691,39 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
{
struct net_device *netdev = priv->netdev;
- struct mlx5e_channels new_channels = {};
+ struct mlx5e_params new_params;
- if (priv->channels.params.lro_en) {
- netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+ if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
return -EINVAL;
}
- if (MLX5_IPSEC_DEV(priv->mdev)) {
- netdev_warn(netdev, "can't set XDP with IPSec offload\n");
- return -EINVAL;
- }
-
- new_channels.params = priv->channels.params;
- new_channels.params.xdp_prog = prog;
+ new_params = priv->channels.params;
+ new_params.xdp_prog = prog;
- /* No XSK params: AF_XDP can't be enabled yet at the point of setting
- * the XDP program.
- */
- if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
- netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
- new_channels.params.sw_mtu,
- mlx5e_xdp_max_mtu(&new_channels.params, NULL));
+ if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
return -EINVAL;
- }
return 0;
}
+static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+
+ old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
+ lockdep_is_held(&rq->priv->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
+}
+
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params new_params;
struct bpf_prog *old_prog;
- bool reset, was_opened;
int err = 0;
+ bool reset;
int i;
mutex_lock(&priv->state_lock);
@@ -4374,91 +4734,59 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
goto unlock;
}
- was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
- if (was_opened && !reset)
- /* num_channels is invariant here, so we can take the
- * batched reference right upfront.
- */
- bpf_prog_add(prog, priv->channels.num);
-
- if (was_opened && reset) {
- struct mlx5e_channels new_channels = {};
+ new_params = priv->channels.params;
+ new_params.xdp_prog = prog;
- new_channels.params = priv->channels.params;
- new_channels.params.xdp_prog = prog;
- mlx5e_set_rq_type(priv->mdev, &new_channels.params);
- old_prog = priv->channels.params.xdp_prog;
-
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
+ * supported with the new parameters: if PAGE_SIZE is bigger than
+ * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
+ * the MTU is small enough for the linear mode, because XDP uses strides
+ * of PAGE_SIZE on regular RQs.
+ */
+ if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
+ err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
if (err)
goto unlock;
- } else {
- /* exchange programs, extra prog reference we got from caller
- * as long as we don't fail from this point onwards.
- */
- old_prog = xchg(&priv->channels.params.xdp_prog, prog);
}
+ old_prog = priv->channels.params.xdp_prog;
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
+ if (err)
+ goto unlock;
+
if (old_prog)
bpf_prog_put(old_prog);
- if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
- mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
-
- if (!was_opened || reset)
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
goto unlock;
/* exchanging programs w/o reset, we update ref counts on behalf
* of the channels RQs here.
*/
+ bpf_prog_add(prog, priv->channels.num);
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
- bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
- if (xsk_open)
- clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- napi_synchronize(&c->napi);
- /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
-
- old_prog = xchg(&c->rq.xdp_prog, prog);
- if (old_prog)
- bpf_prog_put(old_prog);
-
- if (xsk_open) {
- old_prog = xchg(&c->xskrq.xdp_prog, prog);
- if (old_prog)
- bpf_prog_put(old_prog);
+ mlx5e_rq_replace_xdp_prog(&c->rq, prog);
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
+ bpf_prog_inc(prog);
+ mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
}
-
- set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
- if (xsk_open)
- set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- /* napi_schedule in case we have missed anything */
- napi_schedule(&c->napi);
}
unlock:
mutex_unlock(&priv->state_lock);
- return err;
-}
-
-static u32 mlx5e_xdp_query(struct net_device *dev)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
- u32 prog_id = 0;
- mutex_lock(&priv->state_lock);
- xdp_prog = priv->channels.params.xdp_prog;
- if (xdp_prog)
- prog_id = xdp_prog->aux->id;
- mutex_unlock(&priv->state_lock);
+ /* Need to fix some features. */
+ if (!err)
+ netdev_update_features(netdev);
- return prog_id;
+ return err;
}
static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
@@ -4466,11 +4794,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return mlx5e_xdp_set(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = mlx5e_xdp_query(dev);
- return 0;
- case XDP_SETUP_XSK_UMEM:
- return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+ case XDP_SETUP_XSK_POOL:
+ return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
return -EINVAL;
@@ -4546,10 +4871,8 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_set_features = mlx5e_set_features,
.ndo_fix_features = mlx5e_fix_features,
.ndo_change_mtu = mlx5e_change_nic_mtu,
- .ndo_do_ioctl = mlx5e_ioctl,
+ .ndo_eth_ioctl = mlx5e_ioctl,
.ndo_set_tx_maxrate = mlx5e_set_tx_maxrate,
- .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
- .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
.ndo_features_check = mlx5e_features_check,
.ndo_tx_timeout = mlx5e_tx_timeout,
.ndo_bpf = mlx5e_xdp,
@@ -4571,122 +4894,12 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
#endif
+ .ndo_get_devlink_port = mlx5e_get_devlink_port,
};
-static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
-{
- if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
- return -EOPNOTSUPP;
- if (!MLX5_CAP_GEN(mdev, eth_net_offloads) ||
- !MLX5_CAP_GEN(mdev, nic_flow_table) ||
- !MLX5_CAP_ETH(mdev, csum_cap) ||
- !MLX5_CAP_ETH(mdev, max_lso_cap) ||
- !MLX5_CAP_ETH(mdev, vlan_cap) ||
- !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap) ||
- MLX5_CAP_FLOWTABLE(mdev,
- flow_table_properties_nic_receive.max_ft_level)
- < 3) {
- mlx5_core_warn(mdev,
- "Not creating net device, some required device capabilities are missing\n");
- return -EOPNOTSUPP;
- }
- if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
- mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
- if (!MLX5_CAP_GEN(mdev, cq_moderation))
- mlx5_core_warn(mdev, "CQ moderation is not supported\n");
-
- return 0;
-}
-
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
- int num_channels)
-{
- int i;
-
- for (i = 0; i < len; i++)
- indirection_rqt[i] = i % num_channels;
-}
-
-static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
-{
- u32 link_speed = 0;
- u32 pci_bw = 0;
-
- mlx5e_port_max_linkspeed(mdev, &link_speed);
- pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
- mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
- link_speed, pci_bw);
-
-#define MLX5E_SLOW_PCI_RATIO (2)
-
- return link_speed && pci_bw &&
- link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
-}
-
-static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
-{
- struct dim_cq_moder moder;
-
- moder.cq_period_mode = cq_period_mode;
- moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
- moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
- moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
-
- return moder;
-}
-
-static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
-{
- struct dim_cq_moder moder;
-
- moder.cq_period_mode = cq_period_mode;
- moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
- moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
- moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
-
- return moder;
-}
-
-static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
-{
- return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
- DIM_CQ_PERIOD_MODE_START_FROM_CQE :
- DIM_CQ_PERIOD_MODE_START_FROM_EQE;
-}
-
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
-{
- if (params->tx_dim_enabled) {
- u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
-
- params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
- } else {
- params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
- }
-
- MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
- params->tx_cq_moderation.cq_period_mode ==
- MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
-}
-
-void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
-{
- if (params->rx_dim_enabled) {
- u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
-
- params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
- } else {
- params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
- }
-
- MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
- params->rx_cq_moderation.cq_period_mode ==
- MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
-}
-
static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
{
int i;
@@ -4699,46 +4912,9 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
}
-void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
-{
- /* Prefer Striding RQ, unless any of the following holds:
- * - Striding RQ configuration is not possible/supported.
- * - Slow PCI heuristic.
- * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
- *
- * No XSK params: checking the availability of striding RQ in general.
- */
- if (!slow_pci_heuristic(mdev) &&
- mlx5e_striding_rq_possible(mdev, params) &&
- (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
- !mlx5e_rx_is_linear_skb(params, NULL)))
- MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
- mlx5e_set_rq_type(mdev, params);
- mlx5e_init_rq_type_params(mdev, params);
-}
-
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
- u16 num_channels)
-{
- enum mlx5e_traffic_types tt;
-
- rss_params->hfunc = ETH_RSS_HASH_TOP;
- netdev_rss_key_fill(rss_params->toeplitz_hash_key,
- sizeof(rss_params->toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, num_channels);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- rss_params->rx_hash_fields[tt] =
- tirc_default_config[tt].rx_hash_fields;
-}
-
-void mlx5e_build_nic_params(struct mlx5e_priv *priv,
- struct mlx5e_xsk *xsk,
- struct mlx5e_rss_params *rss_params,
- struct mlx5e_params *params,
- u16 mtu)
+void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
{
+ struct mlx5e_params *params = &priv->channels.params;
struct mlx5_core_dev *mdev = priv->mdev;
u8 rx_cq_period_mode;
@@ -4746,16 +4922,16 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
params->hard_mtu = MLX5E_ETH_HARD_MTU;
params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
priv->max_nch);
- params->num_tc = 1;
+ mlx5e_params_mqprio_reset(params);
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
/* XDP SQ */
- MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
- MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
/* set CQE compression */
params->rx_cqe_compress_def = false;
@@ -4769,15 +4945,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
/* RQ */
mlx5e_build_rq_params(mdev, params);
- /* HW LRO */
-
- /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
- if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- /* No XSK params: checking the availability of striding RQ in general. */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- params->lro_en = !slow_pci_heuristic(mdev);
- }
- params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+ params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
@@ -4791,25 +4959,75 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
/* TX inline */
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
- /* RSS */
- mlx5e_build_rss_params(rss_params, params->num_channels);
- params->tunneled_offload_en =
- mlx5e_tunnel_inner_ft_supported(mdev);
+ params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
/* AF_XDP */
params->xsk = xsk;
+
+ /* Do not update netdev->features directly in here
+ * on mlx5e_attach_netdev() we will call mlx5e_update_features()
+ * To update netdev->features please modify mlx5e_fix_features()
+ */
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ u8 addr[ETH_ALEN];
- mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
- if (is_zero_ether_addr(netdev->dev_addr) &&
+ mlx5_query_mac_address(priv->mdev, addr);
+ if (is_zero_ether_addr(addr) &&
!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
eth_hw_addr_random(netdev);
mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
+ return;
}
+
+ eth_hw_addr_set(netdev, addr);
+}
+
+static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
+}
+
+static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
+}
+
+void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
+{
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ priv->nic_info.set_port = mlx5e_vxlan_set_port;
+ priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
+ priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
+ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
+ priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
+ /* Don't count the space hard-coded to the IANA port */
+ priv->nic_info.tables[0].n_entries =
+ mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
+
+ priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
+}
+
+static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
+ return true;
+ }
+ return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
}
static void mlx5e_build_nic_netdev(struct net_device *netdev)
@@ -4823,10 +5041,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->netdev_ops = &mlx5e_netdev_ops;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
- netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
-#endif
+ mlx5e_dcbnl_build_netdev(netdev);
netdev->watchdog_timeo = 15 * HZ;
@@ -4839,6 +5054,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_RXCSUM;
netdev->vlan_features |= NETIF_F_RXHASH;
+ netdev->vlan_features |= NETIF_F_GSO_PARTIAL;
netdev->mpls_features |= NETIF_F_SG;
netdev->mpls_features |= NETIF_F_HW_CSUM;
@@ -4848,8 +5064,16 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ /* Tunneled LRO is not supported in the driver, and the same RQs are
+ * shared between inner and outer TIRs, so the driver can't disable LRO
+ * for inner TIRs while having it enabled for outer TIRs. Due to this,
+ * block LRO altogether if the firmware declares tunneled LRO support.
+ */
if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
- mlx5e_check_fragmented_striding_rq_cap(mdev))
+ !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
+ !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
+ mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED))
netdev->vlan_features |= NETIF_F_LRO;
netdev->hw_features = netdev->vlan_features;
@@ -4858,8 +5082,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
- if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
- mlx5e_any_tunnel_proto_supported(mdev)) {
+ if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
netdev->hw_enc_features |= NETIF_F_TSO6;
@@ -4876,7 +5099,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
NETIF_F_GSO_UDP_TUNNEL_CSUM;
}
- if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) {
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
netdev->hw_features |= NETIF_F_GSO_GRE |
NETIF_F_GSO_GRE_CSUM;
netdev->hw_enc_features |= NETIF_F_GSO_GRE |
@@ -4885,7 +5108,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
NETIF_F_GSO_GRE_CSUM;
}
- if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_IPIP)) {
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
NETIF_F_GSO_IPXIP6;
netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
@@ -4894,7 +5117,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
NETIF_F_GSO_IPXIP6;
}
- netdev->hw_features |= NETIF_F_GSO_PARTIAL;
netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
netdev->hw_features |= NETIF_F_GSO_UDP_L4;
netdev->features |= NETIF_F_GSO_UDP_L4;
@@ -4907,26 +5129,24 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (MLX5_CAP_ETH(mdev, scatter_fcs))
netdev->hw_features |= NETIF_F_RXFCS;
+ if (mlx5_qos_is_supported(mdev))
+ netdev->hw_features |= NETIF_F_HW_TC;
+
netdev->features = netdev->hw_features;
- if (!priv->channels.params.lro_en)
- netdev->features &= ~NETIF_F_LRO;
+ /* Defaults */
if (fcs_enabled)
netdev->features &= ~NETIF_F_RXALL;
-
- if (!priv->channels.params.scatter_fcs_en)
- netdev->features &= ~NETIF_F_RXFCS;
-
- /* prefere CQE compression over rxhash */
- if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
- netdev->features &= ~NETIF_F_RXHASH;
+ netdev->features &= ~NETIF_F_LRO;
+ netdev->features &= ~NETIF_F_GRO_HW;
+ netdev->features &= ~NETIF_F_RXFCS;
#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
if (FT_CAP(flow_modify_en) &&
FT_CAP(modify_root) &&
FT_CAP(identified_miss_table_mode) &&
FT_CAP(flow_table_modify)) {
-#ifdef CONFIG_MLX5_ESWITCH
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
netdev->hw_features |= NETIF_F_HW_TC;
#endif
#ifdef CONFIG_MLX5_EN_ARFS
@@ -4939,81 +5159,102 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->priv_flags |= IFF_UNICAST_FLT;
+ netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
mlx5e_set_netdev_dev_addr(netdev);
+ mlx5e_macsec_build_netdev(priv);
mlx5e_ipsec_build_netdev(priv);
- mlx5e_tls_build_netdev(priv);
+ mlx5e_ktls_build_netdev(priv);
}
void mlx5e_create_q_counters(struct mlx5e_priv *priv)
{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
int err;
- err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter);
- if (err) {
- mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
- priv->q_counter = 0;
- }
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
+ if (!err)
+ priv->q_counter =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter);
- if (err) {
- mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err);
- priv->drop_rq_q_counter = 0;
- }
+ err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
+ if (!err)
+ priv->drop_rq_q_counter =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
}
void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
{
- if (priv->q_counter)
- mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ if (priv->q_counter) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ priv->q_counter);
+ mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+ }
- if (priv->drop_rq_q_counter)
- mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
+ if (priv->drop_rq_q_counter) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ priv->drop_rq_q_counter);
+ mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+ }
}
static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+ struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rss_params *rss = &priv->rss_params;
+ struct mlx5e_flow_steering *fs;
int err;
- err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
- if (err)
- return err;
-
- mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params,
- netdev->mtu);
+ mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
+ mlx5e_vxlan_set_netdev_info(priv);
mlx5e_timestamp_init(priv);
+ fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!fs) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
+ return err;
+ }
+ priv->fs = fs;
+
err = mlx5e_ipsec_init(priv);
if (err)
mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
- err = mlx5e_tls_init(priv);
+
+ err = mlx5e_ktls_init(priv);
if (err)
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
- mlx5e_build_nic_netdev(netdev);
- mlx5e_health_create_reporters(priv);
+ mlx5e_health_create_reporters(priv);
return 0;
}
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
mlx5e_health_destroy_reporters(priv);
- mlx5e_tls_cleanup(priv);
+ mlx5e_ktls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
- mlx5e_netdev_cleanup(priv->netdev, priv);
+ mlx5e_fs_cleanup(priv->fs);
}
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ enum mlx5e_rx_res_features features;
int err;
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res)
+ return -ENOMEM;
+
mlx5e_create_q_counters(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -5022,75 +5263,81 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
- err = mlx5e_create_indirect_rqt(priv);
+ features = MLX5E_RX_RES_FEATURE_PTP;
+ if (priv->channels.params.tunneled_offload_en)
+ features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, true);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
- if (err)
- goto err_destroy_indirect_tirs;
-
- err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
- if (unlikely(err))
- goto err_destroy_direct_tirs;
-
- err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
- if (unlikely(err))
- goto err_destroy_xsk_rqts;
-
- err = mlx5e_create_flow_steering(priv);
+ err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile,
+ priv->netdev);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
- goto err_destroy_xsk_tirs;
+ goto err_destroy_rx_res;
}
err = mlx5e_tc_nic_init(priv);
if (err)
goto err_destroy_flow_steering;
+ err = mlx5e_accel_init_rx(priv);
+ if (err)
+ goto err_tc_nic_cleanup;
+
+#ifdef CONFIG_MLX5_EN_ARFS
+ priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev);
+#endif
+
return 0;
+err_tc_nic_cleanup:
+ mlx5e_tc_nic_cleanup(priv);
err_destroy_flow_steering:
- mlx5e_destroy_flow_steering(priv);
-err_destroy_xsk_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
-err_destroy_xsk_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv, true);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
-err_destroy_indirect_rqts:
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
return err;
}
static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
{
+ mlx5e_accel_cleanup_rx(priv);
mlx5e_tc_nic_cleanup(priv);
- mlx5e_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
- mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
- mlx5e_destroy_indirect_tirs(priv, true);
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+}
+
+static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params;
+ struct mlx5e_mqprio_rl *rl;
+
+ params = &priv->channels.params;
+ if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL)
+ return;
+
+ rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc,
+ params->mqprio.channel.max_rate);
+ if (IS_ERR(rl))
+ rl = NULL;
+ priv->mqprio_rl = rl;
+ mlx5e_mqprio_rl_update_params(params, rl);
}
static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
@@ -5103,44 +5350,56 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
return err;
}
-#ifdef CONFIG_MLX5_CORE_EN_DCB
+ err = mlx5e_accel_init_tx(priv);
+ if (err)
+ goto err_destroy_tises;
+
+ mlx5e_set_mqprio_rl(priv);
mlx5e_dcbnl_initialize(priv);
-#endif
return 0;
+
+err_destroy_tises:
+ mlx5e_destroy_tises(priv);
+ return err;
}
static void mlx5e_nic_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ mlx5e_fs_init_l2_addr(priv->fs, netdev);
- mlx5e_init_l2_addr(priv);
+ err = mlx5e_macsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err);
/* Marking the link as currently not needed by the Driver */
if (!netif_running(netdev))
- mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+ mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);
- mlx5_lag_add(mdev, netdev);
+ mlx5_lag_add_netdev(mdev, netdev);
mlx5e_enable_async_events(priv);
+ mlx5e_enable_blocking_events(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_init(priv);
mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_init_app(priv);
-#endif
- queue_work(priv->wq, &priv->set_rx_mode_work);
+ mlx5e_nic_set_rx_mode(priv);
rtnl_lock();
if (netif_running(netdev))
mlx5e_open(netdev);
+ udp_tunnel_nic_reset_ntf(priv->netdev);
netif_device_attach(netdev);
rtnl_unlock();
}
@@ -5149,10 +5408,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
if (priv->netdev->reg_state == NETREG_REGISTERED)
mlx5e_dcbnl_delete_app(priv);
-#endif
rtnl_lock();
if (netif_running(priv->netdev))
@@ -5160,19 +5417,27 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
netif_device_detach(priv->netdev);
rtnl_unlock();
- queue_work(priv->wq, &priv->set_rx_mode_work);
+ mlx5e_nic_set_rx_mode(priv);
mlx5e_hv_vhca_stats_destroy(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_cleanup(priv);
+ mlx5e_disable_blocking_events(priv);
+ if (priv->en_trap) {
+ mlx5e_deactivate_trap(priv);
+ mlx5e_close_trap(priv->en_trap);
+ priv->en_trap = NULL;
+ }
mlx5e_disable_async_events(priv);
- mlx5_lag_remove(mdev);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
+ mlx5_vxlan_reset_to_default(mdev->vxlan);
+ mlx5e_macsec_cleanup(priv);
}
int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
{
- return mlx5e_refresh_tirs(priv, false);
+ return mlx5e_refresh_tirs(priv, false, false);
}
static const struct mlx5e_profile mlx5e_nic_profile = {
@@ -5185,35 +5450,102 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.enable = mlx5e_nic_enable,
.disable = mlx5e_nic_disable,
.update_rx = mlx5e_update_nic_rx,
- .update_stats = mlx5e_update_ndo_stats,
+ .update_stats = mlx5e_stats_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .rx_handlers = &mlx5e_rx_handlers_nic,
.max_tc = MLX5E_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK),
.stats_grps = mlx5e_nic_stats_grps,
.stats_grps_num = mlx5e_nic_stats_grps_num,
+ .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) |
+ BIT(MLX5E_PROFILE_FEATURE_PTP_TX) |
+ BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) |
+ BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) |
+ BIT(MLX5E_PROFILE_FEATURE_FS_TC),
};
-/* mlx5e generic netdev management API (move to en_common.c) */
+static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ int nch;
+
+ nch = mlx5e_get_max_num_channels(mdev);
+
+ if (profile->max_nch_limit)
+ nch = min_t(int, nch, profile->max_nch_limit(mdev));
+ return nch;
+}
+
+static unsigned int
+mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
+ const struct mlx5e_profile *profile)
-/* mlx5e_netdev_init/cleanup must be called from profile->init/cleanup callbacks */
-int mlx5e_netdev_init(struct net_device *netdev,
- struct mlx5e_priv *priv,
- struct mlx5_core_dev *mdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
{
+ unsigned int max_nch, tmp;
+
+ /* core resources */
+ max_nch = mlx5e_profile_max_num_channels(mdev, profile);
+
+ /* netdev rx queues */
+ max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues);
+
+ /* netdev tx queues */
+ tmp = netdev->num_tx_queues;
+ if (mlx5_qos_is_supported(mdev))
+ tmp -= mlx5e_qos_max_leaf_nodes(mdev);
+ if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
+ tmp -= profile->max_tc;
+ tmp = tmp / profile->max_tc;
+ max_nch = min_t(unsigned int, max_nch, tmp);
+
+ return max_nch;
+}
+
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
+{
+ /* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
+ * and 1 set of direct TIRS
+ */
+ return 2 * MLX5E_NUM_INDIR_TIRS
+ + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
+}
+
+void mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ set_rx_mode_work);
+
+ return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev);
+}
+
+/* mlx5e generic netdev management API (move to en_common.c) */
+int mlx5e_priv_init(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev,
+ struct mlx5_core_dev *mdev)
+{
+ int nch, num_txqs, node;
+ int err;
+
+ num_txqs = netdev->num_tx_queues;
+ nch = mlx5e_calc_max_nch(mdev, netdev, profile);
+ node = dev_to_node(mlx5_core_dma_dev(mdev));
+
/* priv init */
priv->mdev = mdev;
priv->netdev = netdev;
- priv->profile = profile;
- priv->ppriv = ppriv;
priv->msglevel = MLX5E_MSG_LEVEL;
- priv->max_nch = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
+ priv->max_nch = nch;
priv->max_opened_tc = 1;
+ if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
mutex_init(&priv->state_lock);
+
+ err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
+ if (err)
+ goto err_free_cpumask;
+
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
@@ -5221,45 +5553,112 @@ int mlx5e_netdev_init(struct net_device *netdev,
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
- return -ENOMEM;
+ goto err_free_selq;
- /* netdev init */
- netif_carrier_off(netdev);
+ priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
+ if (!priv->txq2sq)
+ goto err_destroy_workqueue;
-#ifdef CONFIG_MLX5_EN_ARFS
- netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev);
-#endif
+ priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
+ if (!priv->tx_rates)
+ goto err_free_txq2sq;
+
+ priv->channel_stats =
+ kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
+ if (!priv->channel_stats)
+ goto err_free_tx_rates;
return 0;
+
+err_free_tx_rates:
+ kfree(priv->tx_rates);
+err_free_txq2sq:
+ kfree(priv->txq2sq);
+err_destroy_workqueue:
+ destroy_workqueue(priv->wq);
+err_free_selq:
+ mlx5e_selq_cleanup(&priv->selq);
+err_free_cpumask:
+ free_cpumask_var(priv->scratchpad.cpumask);
+ return -ENOMEM;
}
-void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
+void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
{
+ int i;
+
+ /* bail if change profile failed and also rollback failed */
+ if (!priv->mdev)
+ return;
+
+ for (i = 0; i < priv->stats_nch; i++)
+ kvfree(priv->channel_stats[i]);
+ kfree(priv->channel_stats);
+ kfree(priv->tx_rates);
+ kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
+ mutex_lock(&priv->state_lock);
+ mlx5e_selq_cleanup(&priv->selq);
+ mutex_unlock(&priv->state_lock);
+ free_cpumask_var(priv->scratchpad.cpumask);
+
+ for (i = 0; i < priv->htb_max_qos_sqs; i++)
+ kfree(priv->htb_qos_sq_stats[i]);
+ kvfree(priv->htb_qos_sq_stats);
+
+ memset(priv, 0, sizeof(*priv));
+}
+
+static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ unsigned int nch, ptp_txqs, qos_txqs;
+
+ nch = mlx5e_profile_max_num_channels(mdev, profile);
+
+ ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) &&
+ mlx5e_profile_feature_cap(profile, PTP_TX) ?
+ profile->max_tc : 0;
+
+ qos_txqs = mlx5_qos_is_supported(mdev) &&
+ mlx5e_profile_feature_cap(profile, QOS_HTB) ?
+ mlx5e_qos_max_leaf_nodes(mdev) : 0;
+
+ return nch * profile->max_tc + ptp_txqs + qos_txqs;
+}
+
+static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ return mlx5e_profile_max_num_channels(mdev, profile);
}
-struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
- const struct mlx5e_profile *profile,
- int nch,
- void *ppriv)
+struct net_device *
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile)
{
struct net_device *netdev;
+ unsigned int txqs, rxqs;
int err;
- netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
- nch * profile->max_tc,
- nch * profile->rq_groups);
+ txqs = mlx5e_get_max_num_txqs(mdev, profile);
+ rxqs = mlx5e_get_max_num_rxqs(mdev, profile);
+
+ netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs);
if (!netdev) {
mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
return NULL;
}
- err = profile->init(mdev, netdev, profile, ppriv);
+ err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev);
if (err) {
- mlx5_core_err(mdev, "failed to init mlx5e profile %d\n", err);
+ mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
goto err_free_netdev;
}
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+ dev_net_set(netdev, mlx5_core_net(mdev));
+
return netdev;
err_free_netdev:
@@ -5268,24 +5667,77 @@ err_free_netdev:
return NULL;
}
+static void mlx5e_update_features(struct net_device *netdev)
+{
+ if (netdev->reg_state != NETREG_REGISTERED)
+ return; /* features will be updated on netdev registration */
+
+ rtnl_lock();
+ netdev_update_features(netdev);
+ rtnl_unlock();
+}
+
+static void mlx5e_reset_channels(struct net_device *netdev)
+{
+ netdev_reset_tc(netdev);
+}
+
int mlx5e_attach_netdev(struct mlx5e_priv *priv)
{
- const struct mlx5e_profile *profile;
+ const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
+ const struct mlx5e_profile *profile = priv->profile;
int max_nch;
int err;
- profile = priv->profile;
clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+
+ /* Validate the max_wqe_size_sq capability. */
+ if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n",
+ mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS);
+ return -EIO;
+ }
/* max number of channels may have changed */
- max_nch = mlx5e_get_max_num_channels(priv->mdev);
+ max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
if (priv->channels.params.num_channels > max_nch) {
mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
+ /* Reducing the number of channels - RXFH has to be reset, and
+ * mlx5e_num_channels_changed below will build the RQT.
+ */
+ priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
priv->channels.params.num_channels = max_nch;
- mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, max_nch);
+ if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n");
+ mlx5e_params_mqprio_reset(&priv->channels.params);
+ }
+ }
+ if (max_nch != priv->max_nch) {
+ mlx5_core_warn(priv->mdev,
+ "MLX5E: Updating max number of channels from %u to %u\n",
+ priv->max_nch, max_nch);
+ priv->max_nch = max_nch;
}
+ /* 1. Set the real number of queues in the kernel the first time.
+ * 2. Set our default XPS cpumask.
+ * 3. Build the RQT.
+ *
+ * rtnl_lock is required by netif_set_real_num_*_queues in case the
+ * netdev has been registered by this point (if this function was called
+ * in the reload or resume flow).
+ */
+ if (take_rtnl)
+ rtnl_lock();
+ err = mlx5e_num_channels_changed(priv);
+ if (take_rtnl)
+ rtnl_unlock();
+ if (err)
+ goto out;
+
err = profile->init_tx(priv);
if (err)
goto out;
@@ -5297,12 +5749,20 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
if (profile->enable)
profile->enable(priv);
+ mlx5e_update_features(priv->netdev);
+
return 0;
err_cleanup_tx:
profile->cleanup_tx(priv);
out:
+ mlx5e_reset_channels(priv->netdev);
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ cancel_work_sync(&priv->update_stats_work);
return err;
}
@@ -5311,6 +5771,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
const struct mlx5e_profile *profile = priv->profile;
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
if (profile->disable)
profile->disable(priv);
@@ -5318,26 +5781,89 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
profile->cleanup_rx(priv);
profile->cleanup_tx(priv);
+ mlx5e_reset_channels(priv->netdev);
cancel_work_sync(&priv->update_stats_work);
}
+static int
+mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ err = mlx5e_priv_init(priv, new_profile, netdev, mdev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
+ return err;
+ }
+ netif_carrier_off(netdev);
+ priv->profile = new_profile;
+ priv->ppriv = new_ppriv;
+ err = new_profile->init(priv->mdev, priv->netdev);
+ if (err)
+ goto priv_cleanup;
+ err = mlx5e_attach_netdev(priv);
+ if (err)
+ goto profile_cleanup;
+ return err;
+
+profile_cleanup:
+ new_profile->cleanup(priv);
+priv_cleanup:
+ mlx5e_priv_cleanup(priv);
+ return err;
+}
+
+int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
+{
+ const struct mlx5e_profile *orig_profile = priv->profile;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *orig_ppriv = priv->ppriv;
+ int err, rollback_err;
+
+ /* cleanup old profile */
+ mlx5e_detach_netdev(priv);
+ priv->profile->cleanup(priv);
+ mlx5e_priv_cleanup(priv);
+
+ err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
+ if (err) { /* roll back to original profile */
+ netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
+ if (rollback_err)
+ netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
+ __func__, rollback_err);
+ return err;
+}
+
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
+{
+ mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
+}
+
void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
{
- const struct mlx5e_profile *profile = priv->profile;
struct net_device *netdev = priv->netdev;
- if (profile->cleanup)
- profile->cleanup(priv);
+ mlx5e_priv_cleanup(priv);
free_netdev(netdev);
}
-/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
- * hardware contexts and to connect it to the current netdev.
- */
-static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
+static int mlx5e_resume(struct auxiliary_device *adev)
{
- struct mlx5e_priv *priv = vpriv;
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = edev->mdev;
int err;
if (netif_device_present(netdev))
@@ -5356,111 +5882,132 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
return 0;
}
-static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
+static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
{
- struct mlx5e_priv *priv = vpriv;
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
struct net_device *netdev = priv->netdev;
-
-#ifdef CONFIG_MLX5_ESWITCH
- if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev)
- return;
-#endif
+ struct mlx5_core_dev *mdev = priv->mdev;
if (!netif_device_present(netdev))
- return;
+ return -ENODEV;
mlx5e_detach_netdev(priv);
mlx5e_destroy_mdev_resources(mdev);
+ return 0;
}
-static void *mlx5e_add(struct mlx5_core_dev *mdev)
+static int mlx5e_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ const struct mlx5e_profile *profile = &mlx5e_nic_profile;
+ struct mlx5_core_dev *mdev = edev->mdev;
struct net_device *netdev;
- void *priv;
+ pm_message_t state = {};
+ struct mlx5e_priv *priv;
int err;
- int nch;
- err = mlx5e_check_required_hca_cap(mdev);
- if (err)
- return NULL;
-
-#ifdef CONFIG_MLX5_ESWITCH
- if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
- mlx5e_rep_register_vport_reps(mdev);
- return mdev;
- }
-#endif
-
- nch = mlx5e_get_max_num_channels(mdev);
- netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL);
+ netdev = mlx5e_create_netdev(mdev, profile);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
- return NULL;
+ return -ENOMEM;
}
- dev_net_set(netdev, mlx5_core_net(mdev));
+ mlx5e_build_nic_netdev(netdev);
+
priv = netdev_priv(netdev);
+ auxiliary_set_drvdata(adev, priv);
+
+ priv->profile = profile;
+ priv->ppriv = NULL;
- err = mlx5e_attach(mdev, priv);
+ err = mlx5e_devlink_port_register(priv);
if (err) {
- mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
+ mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
goto err_destroy_netdev;
}
+ err = profile->init(mdev, netdev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
+ goto err_devlink_cleanup;
+ }
+
+ err = mlx5e_resume(adev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err);
+ goto err_profile_cleanup;
+ }
+
err = register_netdev(netdev);
if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
- goto err_detach;
+ goto err_resume;
}
-#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_devlink_port_type_eth_set(priv);
+
mlx5e_dcbnl_init_app(priv);
-#endif
- return priv;
+ mlx5_uplink_netdev_set(mdev, netdev);
+ return 0;
-err_detach:
- mlx5e_detach(mdev, priv);
+err_resume:
+ mlx5e_suspend(adev, state);
+err_profile_cleanup:
+ profile->cleanup(priv);
+err_devlink_cleanup:
+ mlx5e_devlink_port_unregister(priv);
err_destroy_netdev:
mlx5e_destroy_netdev(priv);
- return NULL;
+ return err;
}
-static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
+static void mlx5e_remove(struct auxiliary_device *adev)
{
- struct mlx5e_priv *priv;
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
+ pm_message_t state = {};
-#ifdef CONFIG_MLX5_ESWITCH
- if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) {
- mlx5e_rep_unregister_vport_reps(mdev);
- return;
- }
-#endif
- priv = vpriv;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
-#endif
unregister_netdev(priv->netdev);
- mlx5e_detach(mdev, vpriv);
+ mlx5e_suspend(adev, state);
+ priv->profile->cleanup(priv);
+ mlx5e_devlink_port_unregister(priv);
mlx5e_destroy_netdev(priv);
}
-static struct mlx5_interface mlx5e_interface = {
- .add = mlx5e_add,
- .remove = mlx5e_remove,
- .attach = mlx5e_attach,
- .detach = mlx5e_detach,
- .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
+static const struct auxiliary_device_id mlx5e_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".eth", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table);
+
+static struct auxiliary_driver mlx5e_driver = {
+ .name = "eth",
+ .probe = mlx5e_probe,
+ .remove = mlx5e_remove,
+ .suspend = mlx5e_suspend,
+ .resume = mlx5e_resume,
+ .id_table = mlx5e_id_table,
};
-void mlx5e_init(void)
+int mlx5e_init(void)
{
- mlx5e_ipsec_build_inverse_table();
+ int ret;
+
mlx5e_build_ptys2ethtool_map();
- mlx5_register_interface(&mlx5e_interface);
+ ret = auxiliary_driver_register(&mlx5e_driver);
+ if (ret)
+ return ret;
+
+ ret = mlx5e_rep_init();
+ if (ret)
+ auxiliary_driver_unregister(&mlx5e_driver);
+ return ret;
}
void mlx5e_cleanup(void)
{
- mlx5_unregister_interface(&mlx5e_interface);
+ mlx5e_rep_cleanup();
+ auxiliary_driver_unregister(&mlx5e_driver);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 6ed307d7f191..794cd8dfe9c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -30,69 +30,54 @@
* SOFTWARE.
*/
-#include <generated/utsrelease.h>
#include <linux/mlx5/fs.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/act_api.h>
-#include <net/netevent.h>
-#include <net/arp.h>
#include <net/devlink.h>
#include <net/ipv6_stubs.h>
#include "eswitch.h"
-#include "eswitch_offloads_chains.h"
#include "en.h"
#include "en_rep.h"
+#include "en/params.h"
+#include "en/txrx.h"
#include "en_tc.h"
-#include "en/tc_tun.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
+#include "en/rep/bridge.h"
+#include "en/devlink.h"
#include "fs_core.h"
-#include "lib/port_tun.h"
#include "lib/mlx5.h"
+#include "lib/devcom.h"
+#include "lib/vxlan.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
+#include "en_accel/ipsec.h"
+#include "en/tc/int_port.h"
+#include "en/ptp.h"
+#include "en/fs_ethtool.h"
#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
- max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
-struct mlx5e_rep_indr_block_priv {
- struct net_device *netdev;
- struct mlx5e_rep_priv *rpriv;
-
- struct list_head list;
-};
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev);
-
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
- strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
+ strscpy(drvinfo->driver, mlx5e_rep_driver_name,
sizeof(drvinfo->driver));
- strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev),
fw_rev_sub(mdev), mdev->board_id);
}
-static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *drvinfo)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- mlx5e_rep_get_drvinfo(dev, drvinfo);
- strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev),
- sizeof(drvinfo->bus_info));
-}
-
static const struct counter_desc sw_rep_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
@@ -192,7 +177,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
if (err) {
- pr_warn("vport %d error %d reading stats\n", rep->vport, err);
+ netdev_warn(priv->netdev, "vport %d error %d reading stats\n",
+ rep->vport, err);
return;
}
@@ -236,41 +222,28 @@ static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
}
}
-static void mlx5e_rep_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+static void
+mlx5e_rep_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- mlx5e_ethtool_get_ringparam(priv, param);
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
}
-static int mlx5e_rep_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+static int
+mlx5e_rep_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(dev);
return mlx5e_ethtool_set_ringparam(priv, param);
}
-static int mlx5e_replace_rep_vport_rx_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_destination *dest)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
- struct mlx5_flow_handle *flow_rule;
-
- flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
- rep->vport,
- dest);
- if (IS_ERR(flow_rule))
- return PTR_ERR(flow_rule);
-
- mlx5_del_flow_rules(rpriv->vport_rx_rule);
- rpriv->vport_rx_rule = flow_rule;
- return 0;
-}
-
static void mlx5e_rep_get_channels(struct net_device *dev,
struct ethtool_channels *ch)
{
@@ -283,49 +256,28 @@ static int mlx5e_rep_set_channels(struct net_device *dev,
struct ethtool_channels *ch)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- u16 curr_channels_amount = priv->channels.params.num_channels;
- u32 new_channels_amount = ch->combined_count;
- struct mlx5_flow_destination new_dest;
- int err = 0;
-
- err = mlx5e_ethtool_set_channels(priv, ch);
- if (err)
- return err;
-
- if (curr_channels_amount == 1 && new_channels_amount > 1) {
- new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- new_dest.ft = priv->fs.ttc.ft.t;
- } else if (new_channels_amount == 1 && curr_channels_amount > 1) {
- new_dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- new_dest.tir_num = priv->direct_tir[0].tirn;
- } else {
- return 0;
- }
-
- err = mlx5e_replace_rep_vport_rx_rule(priv, &new_dest);
- if (err) {
- netdev_warn(priv->netdev, "Failed to update vport rx rule, when going from (%d) channels to (%d) channels\n",
- curr_channels_amount, new_channels_amount);
- return err;
- }
- return 0;
+ return mlx5e_ethtool_set_channels(priv, ch);
}
static int mlx5e_rep_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_get_coalesce(priv, coal);
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
}
static int mlx5e_rep_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return mlx5e_ethtool_set_coalesce(priv, coal);
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
}
static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
@@ -342,39 +294,10 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
-static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- mlx5e_ethtool_get_pauseparam(priv, pauseparam);
-}
-
-static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
-}
-
-static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *link_ksettings)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
-}
-
-static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *link_ksettings)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
-}
-
static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = mlx5e_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
@@ -390,39 +313,6 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
};
-static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
- .get_drvinfo = mlx5e_uplink_rep_get_drvinfo,
- .get_link = ethtool_op_get_link,
- .get_strings = mlx5e_rep_get_strings,
- .get_sset_count = mlx5e_rep_get_sset_count,
- .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
- .get_ringparam = mlx5e_rep_get_ringparam,
- .set_ringparam = mlx5e_rep_set_ringparam,
- .get_channels = mlx5e_rep_get_channels,
- .set_channels = mlx5e_rep_set_channels,
- .get_coalesce = mlx5e_rep_get_coalesce,
- .set_coalesce = mlx5e_rep_set_coalesce,
- .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
- .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
- .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
- .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
- .get_pauseparam = mlx5e_uplink_rep_get_pauseparam,
- .set_pauseparam = mlx5e_uplink_rep_set_pauseparam,
-};
-
-static void mlx5e_rep_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid)
-{
- struct mlx5e_priv *priv;
- u64 parent_id;
-
- priv = netdev_priv(dev);
-
- parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
- ppid->id_len = sizeof(parent_id);
- memcpy(ppid->id, &parent_id, sizeof(parent_id));
-}
-
static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
@@ -435,6 +325,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ if (rep_sq->send_to_vport_rule_peer)
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
list_del(&rep_sq->list);
kfree(rep_sq);
}
@@ -444,6 +336,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep,
u32 *sqns_array, int sqns_num)
{
+ struct mlx5_eswitch *peer_esw = NULL;
struct mlx5_flow_handle *flow_rule;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;
@@ -454,6 +347,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
+ if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
for (i = 0; i < sqns_num; i++) {
rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
if (!rep_sq) {
@@ -462,8 +359,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
}
/* Add re-inject rule to the PF/representor sqs */
- flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
- rep->vport,
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
sqns_array[i]);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
@@ -471,26 +367,61 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
goto out_err;
}
rep_sq->send_to_vport_rule = flow_rule;
+ rep_sq->sqn = sqns_array[i];
+
+ if (peer_esw) {
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+ rep, sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
list_add(&rep_sq->list, &rpriv->vport_sqs_list);
}
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return 0;
out_err:
mlx5e_sqs2vport_stop(esw, rep);
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return err;
}
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+static int
+mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
{
+ int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool is_uplink_rep = mlx5e_is_uplink_rep(priv);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int n, tc, nch, num_sqs = 0;
struct mlx5e_channel *c;
- int n, tc, num_sqs = 0;
int err = -ENOMEM;
+ bool ptp_sq;
u32 *sqs;
- sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
+ ptp_sq = !!(priv->channels.ptp &&
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS));
+ nch = priv->channels.num + ptp_sq;
+ /* +2 for xdpsqs, they don't exist on the ptp channel but will not be
+ * counted for by num_sqs.
+ */
+ if (is_uplink_rep)
+ sqs_per_channel += 2;
+
+ sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL);
if (!sqs)
goto out;
@@ -498,10 +429,23 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
c = priv->channels.c[n];
for (tc = 0; tc < c->num_tc; tc++)
sqs[num_sqs++] = c->sq[tc].sqn;
+
+ if (is_uplink_rep) {
+ if (c->xdp)
+ sqs[num_sqs++] = c->rq_xdpsq.sqn;
+
+ sqs[num_sqs++] = c->xdpsq.sqn;
+ }
+ }
+ if (ptp_sq) {
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+
+ for (tc = 0; tc < ptp_ch->num_tc; tc++)
+ sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn;
}
err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
- kfree(sqs);
+ kvfree(sqs);
out:
if (err)
@@ -509,7 +453,8 @@ out:
return err;
}
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+static void
+mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -518,655 +463,47 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
mlx5e_sqs2vport_stop(esw, rep);
}
-static unsigned long mlx5e_rep_ipv6_interval(void)
-{
- if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
- return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
-
- return ~0UL;
-}
-
-static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
-{
- unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
- unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
- mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
-}
-
-void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-
- mlx5_fc_queue_stats_work(priv->mdev,
- &neigh_update->neigh_stats_work,
- neigh_update->min_interval);
-}
-
-static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
-{
- return refcount_inc_not_zero(&nhe->refcnt);
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
-
-static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
-{
- if (refcount_dec_and_test(&nhe->refcnt)) {
- mlx5e_rep_neigh_entry_remove(nhe);
- kfree_rcu(nhe, rcu);
- }
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
- struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_neigh_hash_entry *next = NULL;
-
- rcu_read_lock();
-
- for (next = nhe ?
- list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
- &nhe->neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list) :
- list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list);
- next;
- next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
- &next->neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list))
- if (mlx5e_rep_neigh_entry_hold(next))
- break;
-
- rcu_read_unlock();
-
- if (nhe)
- mlx5e_rep_neigh_entry_release(nhe);
-
- return next;
-}
-
-static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
-{
- struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
- neigh_update.neigh_stats_work.work);
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_neigh_hash_entry *nhe = NULL;
-
- rtnl_lock();
- if (!list_empty(&rpriv->neigh_update.neigh_list))
- mlx5e_rep_queue_neigh_stats_work(priv);
-
- while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
- mlx5e_tc_update_neigh_used_value(nhe);
-
- rtnl_unlock();
-}
-
-static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- bool neigh_connected,
- unsigned char ha[ETH_ALEN])
-{
- struct ethhdr *eth = (struct ethhdr *)e->encap_header;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- bool encap_connected;
- LIST_HEAD(flow_list);
-
- ASSERT_RTNL();
-
- /* wait for encap to be fully initialized */
- wait_for_completion(&e->res_ready);
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
- if (e->compl_result < 0 || (encap_connected == neigh_connected &&
- ether_addr_equal(e->h_dest, ha)))
- goto unlock;
-
- mlx5e_take_all_encap_flows(e, &flow_list);
-
- if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
- (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
- mlx5e_tc_encap_flows_del(priv, e, &flow_list);
-
- if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
- ether_addr_copy(e->h_dest, ha);
- ether_addr_copy(eth->h_dest, ha);
- /* Update the encap source mac, in case that we delete
- * the flows when encap source mac changed.
- */
- ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
-
- mlx5e_tc_encap_flows_add(priv, e, &flow_list);
- }
-unlock:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- mlx5e_put_encap_flow_list(priv, &flow_list);
-}
-
-static void mlx5e_rep_neigh_update(struct work_struct *work)
-{
- struct mlx5e_neigh_hash_entry *nhe =
- container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
- struct neighbour *n = nhe->n;
- struct mlx5e_encap_entry *e;
- unsigned char ha[ETH_ALEN];
- struct mlx5e_priv *priv;
- bool neigh_connected;
- u8 nud_state, dead;
-
- rtnl_lock();
-
- /* If these parameters are changed after we release the lock,
- * we'll receive another event letting us know about it.
- * We use this lock to avoid inconsistency between the neigh validity
- * and it's hw address.
- */
- read_lock_bh(&n->lock);
- memcpy(ha, n->ha, ETH_ALEN);
- nud_state = n->nud_state;
- dead = n->dead;
- read_unlock_bh(&n->lock);
-
- neigh_connected = (nud_state & NUD_VALID) && !dead;
-
- trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
-
- list_for_each_entry(e, &nhe->encap_list, encap_list) {
- if (!mlx5e_encap_take(e))
- continue;
-
- priv = netdev_priv(e->out_dev);
- mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
- mlx5e_encap_put(priv, e);
- }
- mlx5e_rep_neigh_entry_release(nhe);
- rtnl_unlock();
- neigh_release(n);
-}
-
-static struct mlx5e_rep_indr_block_priv *
-mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- struct mlx5e_rep_indr_block_priv *cb_priv;
-
- /* All callback list access should be protected by RTNL. */
- ASSERT_RTNL();
-
- list_for_each_entry(cb_priv,
- &rpriv->uplink_priv.tc_indr_block_priv_list,
- list)
- if (cb_priv->netdev == netdev)
- return cb_priv;
-
- return NULL;
-}
-
-static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
- struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
-
- list_for_each_entry_safe(cb_priv, temp, head, list) {
- mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
- kfree(cb_priv);
- }
-}
-
-static int
-mlx5e_rep_indr_offload(struct net_device *netdev,
- struct flow_cls_offload *flower,
- struct mlx5e_rep_indr_block_priv *indr_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
- struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
- int err = 0;
-
- switch (flower->command) {
- case FLOW_CLS_REPLACE:
- err = mlx5e_configure_flower(netdev, priv, flower, flags);
- break;
- case FLOW_CLS_DESTROY:
- err = mlx5e_delete_flower(netdev, priv, flower, flags);
- break;
- case FLOW_CLS_STATS:
- err = mlx5e_stats_flower(netdev, priv, flower, flags);
- break;
- default:
- err = -EOPNOTSUPP;
- }
-
- return err;
-}
-
-static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
- void *type_data, void *indr_priv)
-{
- struct mlx5e_rep_indr_block_priv *priv = indr_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_indr_offload(priv->netdev, type_data, priv);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv)
-{
- struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
-
- list_del(&indr_priv->list);
- kfree(indr_priv);
-}
-
-static LIST_HEAD(mlx5e_block_cb_list);
-
static int
-mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
- struct mlx5e_rep_priv *rpriv,
- struct flow_block_offload *f)
+mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_indr_block_priv *indr_priv;
- struct flow_block_cb *block_cb;
-
- if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- return -EOPNOTSUPP;
-
- f->unlocked_driver_cb = true;
- f->driver_block_list = &mlx5e_block_cb_list;
-
- switch (f->command) {
- case FLOW_BLOCK_BIND:
- indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
- if (indr_priv)
- return -EEXIST;
-
- indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
- if (!indr_priv)
- return -ENOMEM;
-
- indr_priv->netdev = netdev;
- indr_priv->rpriv = rpriv;
- list_add(&indr_priv->list,
- &rpriv->uplink_priv.tc_indr_block_priv_list);
-
- block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb,
- indr_priv, indr_priv,
- mlx5e_rep_indr_tc_block_unbind);
- if (IS_ERR(block_cb)) {
- list_del(&indr_priv->list);
- kfree(indr_priv);
- return PTR_ERR(block_cb);
- }
- flow_block_cb_add(block_cb, f);
- list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_group *g;
+ g = esw->fdb_table.offloads.send_to_vport_meta_grp;
+ if (!g)
return 0;
- case FLOW_BLOCK_UNBIND:
- indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
- if (!indr_priv)
- return -ENOENT;
-
- block_cb = flow_block_cb_lookup(f->block,
- mlx5e_rep_indr_setup_block_cb,
- indr_priv);
- if (!block_cb)
- return -ENOENT;
-
- flow_block_cb_remove(block_cb, f);
- list_del(&block_cb->driver_list);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
- return 0;
-}
-static
-int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
- enum tc_setup_type type, void *type_data)
-{
- switch (type) {
- case TC_SETUP_BLOCK:
- return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv,
- type_data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- int err;
-
- err = __flow_indr_block_cb_register(netdev, rpriv,
- mlx5e_rep_indr_setup_tc_cb,
- rpriv);
- if (err) {
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
- mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
- netdev_name(netdev), err);
- }
- return err;
-}
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
- rpriv);
-}
-
-static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
- uplink_priv.netdevice_nb);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
- struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
- return NOTIFY_OK;
-
- switch (event) {
- case NETDEV_REGISTER:
- mlx5e_rep_indr_register_block(rpriv, netdev);
- break;
- case NETDEV_UNREGISTER:
- mlx5e_rep_indr_unregister_block(rpriv, netdev);
- break;
- }
- return NOTIFY_OK;
-}
-
-static void
-mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe,
- struct neighbour *n)
-{
- /* Take a reference to ensure the neighbour and mlx5 encap
- * entry won't be destructed until we drop the reference in
- * delayed work.
- */
- neigh_hold(n);
-
- /* This assignment is valid as long as the the neigh reference
- * is taken
- */
- nhe->n = n;
-
- if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
- mlx5e_rep_neigh_entry_release(nhe);
- neigh_release(n);
- }
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
- struct mlx5e_neigh *m_neigh);
-
-static int mlx5e_rep_netevent_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
- neigh_update.netevent_nb);
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_neigh_hash_entry *nhe = NULL;
- struct mlx5e_neigh m_neigh = {};
- struct neigh_parms *p;
- struct neighbour *n;
- bool found = false;
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- n = ptr;
-#if IS_ENABLED(CONFIG_IPV6)
- if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
-#else
- if (n->tbl != &arp_tbl)
-#endif
- return NOTIFY_DONE;
-
- m_neigh.dev = n->dev;
- m_neigh.family = n->ops->family;
- memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-
- rcu_read_lock();
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
- rcu_read_unlock();
- if (!nhe)
- return NOTIFY_DONE;
-
- mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
- break;
-
- case NETEVENT_DELAY_PROBE_TIME_UPDATE:
- p = ptr;
-
- /* We check the device is present since we don't care about
- * changes in the default table, we only care about changes
- * done per device delay prob time parameter.
- */
-#if IS_ENABLED(CONFIG_IPV6)
- if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
-#else
- if (!p->dev || p->tbl != &arp_tbl)
-#endif
- return NOTIFY_DONE;
-
- rcu_read_lock();
- list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
- neigh_list) {
- if (p->dev == nhe->m_neigh.dev) {
- found = true;
- break;
- }
- }
- rcu_read_unlock();
- if (!found)
- return NOTIFY_DONE;
-
- neigh_update->min_interval = min_t(unsigned long,
- NEIGH_VAR(p, DELAY_PROBE_TIME),
- neigh_update->min_interval);
- mlx5_fc_update_sampling_interval(priv->mdev,
- neigh_update->min_interval);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static const struct rhashtable_params mlx5e_neigh_ht_params = {
- .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
- .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
- .key_len = sizeof(struct mlx5e_neigh),
- .automatic_shrinking = true,
-};
-
-static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- int err;
-
- err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
- if (err)
- return err;
+ flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
- INIT_LIST_HEAD(&neigh_update->neigh_list);
- mutex_init(&neigh_update->encap_lock);
- INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
- mlx5e_rep_neigh_stats_work);
- mlx5e_rep_neigh_update_init_interval(rpriv);
+ rpriv->send_to_vport_meta_rule = flow_rule;
- rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
- err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
- if (err)
- goto out_err;
return 0;
-
-out_err:
- rhashtable_destroy(&neigh_update->neigh_ht);
- return err;
-}
-
-static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
- unregister_netevent_notifier(&neigh_update->netevent_nb);
-
- flush_workqueue(priv->wq); /* flush neigh update works */
-
- cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
-
- mutex_destroy(&neigh_update->encap_lock);
- rhashtable_destroy(&neigh_update->neigh_ht);
-}
-
-static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- int err;
-
- err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
- &nhe->rhash_node,
- mlx5e_neigh_ht_params);
- if (err)
- return err;
-
- list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
-
- return err;
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
-
- mutex_lock(&rpriv->neigh_update.encap_lock);
-
- list_del_rcu(&nhe->neigh_list);
-
- rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
- &nhe->rhash_node,
- mlx5e_neigh_ht_params);
- mutex_unlock(&rpriv->neigh_update.encap_lock);
}
-/* This function must only be called under the representor's encap_lock or
- * inside rcu read lock section.
- */
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
- struct mlx5e_neigh *m_neigh)
+static void
+mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct mlx5e_neigh_hash_entry *nhe;
- nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
- mlx5e_neigh_ht_params);
- return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+ if (rpriv->send_to_vport_meta_rule)
+ mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule);
}
-static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct mlx5e_neigh_hash_entry **nhe)
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv)
{
- int err;
-
- *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
- if (!*nhe)
- return -ENOMEM;
-
- (*nhe)->priv = priv;
- memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
- INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
- spin_lock_init(&(*nhe)->encap_list_lock);
- INIT_LIST_HEAD(&(*nhe)->encap_list);
- refcount_set(&(*nhe)->refcnt, 1);
-
- err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
- if (err)
- goto out_free;
- return 0;
-
-out_free:
- kfree(*nhe);
- return err;
+ mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_add_meta_tunnel_rule(priv);
}
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
- struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
- struct mlx5e_neigh_hash_entry *nhe;
- int err;
-
- err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
- if (err)
- return err;
-
- mutex_lock(&rpriv->neigh_update.encap_lock);
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
- if (!nhe) {
- err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
- if (err) {
- mutex_unlock(&rpriv->neigh_update.encap_lock);
- mlx5_tun_entropy_refcount_dec(tun_entropy,
- e->reformat_type);
- return err;
- }
- }
-
- e->nhe = nhe;
- spin_lock(&nhe->encap_list_lock);
- list_add_rcu(&e->encap_list, &nhe->encap_list);
- spin_unlock(&nhe->encap_list_lock);
-
- mutex_unlock(&rpriv->neigh_update.encap_lock);
-
- return 0;
-}
-
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
- struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
-
- if (!e->nhe)
- return;
-
- spin_lock(&e->nhe->encap_list_lock);
- list_del_rcu(&e->encap_list);
- spin_unlock(&e->nhe->encap_list_lock);
-
- mlx5e_rep_neigh_entry_release(e->nhe);
- e->nhe = NULL;
- mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+ mlx5e_rep_del_meta_tunnel_rule(priv);
+ mlx5e_remove_sqs_fwd_rules(priv);
}
static int mlx5e_rep_open(struct net_device *dev)
@@ -1209,130 +546,6 @@ static int mlx5e_rep_close(struct net_device *dev)
return ret;
}
-static int
-mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct flow_cls_offload *cls_flower, int flags)
-{
- switch (cls_flower->command) {
- case FLOW_CLS_REPLACE:
- return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_DESTROY:
- return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_STATS:
- return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
- flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static
-int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
- struct tc_cls_matchall_offload *ma)
-{
- switch (ma->command) {
- case TC_CLSMATCHALL_REPLACE:
- return mlx5e_tc_configure_matchall(priv, ma);
- case TC_CLSMATCHALL_DESTROY:
- return mlx5e_tc_delete_matchall(priv, ma);
- case TC_CLSMATCHALL_STATS:
- mlx5e_tc_stats_matchall(priv, ma);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
- case TC_SETUP_CLSMATCHALL:
- return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- struct flow_cls_offload tmp, *f = type_data;
- struct mlx5e_priv *priv = cb_priv;
- struct mlx5_eswitch *esw;
- unsigned long flags;
- int err;
-
- flags = MLX5_TC_FLAG(INGRESS) |
- MLX5_TC_FLAG(ESW_OFFLOAD) |
- MLX5_TC_FLAG(FT_OFFLOAD);
- esw = priv->mdev->priv.eswitch;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- memcpy(&tmp, f, sizeof(*f));
-
- if (!mlx5_esw_chains_prios_supported(esw) ||
- tmp.common.chain_index)
- return -EOPNOTSUPP;
-
- /* Re-use tc offload path by moving the ft flow to the
- * reserved ft chain.
- *
- * FT offload can use prio range [0, INT_MAX], so we normalize
- * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
- * as with tc, where prio 0 isn't supported.
- *
- * We only support chain 0 of FT offload.
- */
- if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
- return -EOPNOTSUPP;
- if (tmp.common.chain_index != 0)
- return -EOPNOTSUPP;
-
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
- tmp.common.prio++;
- err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
- memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
- return err;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
-static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
-static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct flow_block_offload *f = type_data;
-
- f->unlocked_driver_cb = true;
-
- switch (type) {
- case TC_SETUP_BLOCK:
- return flow_block_cb_setup_simple(type_data,
- &mlx5e_rep_block_tc_cb_list,
- mlx5e_rep_setup_tc_cb,
- priv, priv, true);
- case TC_SETUP_FT:
- return flow_block_cb_setup_simple(type_data,
- &mlx5e_rep_block_ft_cb_list,
- mlx5e_rep_setup_ft_cb,
- priv, priv, true);
- default:
- return -EOPNOTSUPP;
- }
-}
-
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1348,7 +561,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
return (rep->vport == MLX5_VPORT_UPLINK);
}
-static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -1368,8 +581,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
return 0;
}
-static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
- void *sp)
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -1394,40 +607,36 @@ static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
return mlx5e_change_mtu(netdev, new_mtu, NULL);
}
-static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
+static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
{
- return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
-}
-
-static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
-{
- struct sockaddr *saddr = addr;
-
- if (!is_valid_ether_addr(saddr->sa_data))
- return -EADDRNOTAVAIL;
-
- ether_addr_copy(netdev->dev_addr, saddr->sa_data);
- return 0;
-}
-
-static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
- __be16 vlan_proto)
-{
- netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
-
- if (vlan != 0)
- return -EOPNOTSUPP;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_core_dev *dev = priv->mdev;
- /* allow setting 0-vid for compatibility with libvirt */
- return 0;
+ return mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
}
-static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int err;
- return &rpriv->dl_port;
+ if (new_carrier) {
+ err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, 1, MLX5_VPORT_ADMIN_STATE_UP);
+ if (err)
+ return err;
+ netif_carrier_on(dev);
+ } else {
+ err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, 1, MLX5_VPORT_ADMIN_STATE_DOWN);
+ if (err)
+ return err;
+ netif_carrier_off(dev);
+ }
+ return 0;
}
static const struct net_device_ops mlx5e_netdev_ops_rep = {
@@ -1435,42 +644,35 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
.ndo_stop = mlx5e_rep_close,
.ndo_start_xmit = mlx5e_xmit,
.ndo_setup_tc = mlx5e_rep_setup_tc,
- .ndo_get_devlink_port = mlx5e_get_devlink_port,
+ .ndo_get_devlink_port = mlx5e_rep_get_devlink_port,
.ndo_get_stats64 = mlx5e_rep_get_stats,
.ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
.ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
.ndo_change_mtu = mlx5e_rep_change_mtu,
+ .ndo_change_carrier = mlx5e_rep_change_carrier,
};
-static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
- .ndo_open = mlx5e_open,
- .ndo_stop = mlx5e_close,
- .ndo_start_xmit = mlx5e_xmit,
- .ndo_set_mac_address = mlx5e_uplink_rep_set_mac,
- .ndo_setup_tc = mlx5e_rep_setup_tc,
- .ndo_get_devlink_port = mlx5e_get_devlink_port,
- .ndo_get_stats64 = mlx5e_get_stats,
- .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
- .ndo_change_mtu = mlx5e_uplink_rep_change_mtu,
- .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
- .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
- .ndo_features_check = mlx5e_features_check,
- .ndo_set_vf_mac = mlx5e_set_vf_mac,
- .ndo_set_vf_rate = mlx5e_set_vf_rate,
- .ndo_get_vf_config = mlx5e_get_vf_config,
- .ndo_get_vf_stats = mlx5e_get_vf_stats,
- .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan,
- .ndo_set_features = mlx5e_set_features,
-};
+bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev)
+{
+ return netdev->netdev_ops == &mlx5e_netdev_ops &&
+ mlx5e_is_uplink_rep(netdev_priv(netdev));
+}
-bool mlx5e_eswitch_rep(struct net_device *netdev)
+bool mlx5e_eswitch_vf_rep(const struct net_device *netdev)
{
- if (netdev->netdev_ops == &mlx5e_netdev_ops_rep ||
- netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
- return true;
+ return netdev->netdev_ops == &mlx5e_netdev_ops_rep;
+}
- return false;
+/* One indirect TIR set for outer. Inner not supported in reps. */
+#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS
+
+static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev)
+{
+ int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir);
+ int num_vports = mlx5_eswitch_get_total_vports(mdev);
+
+ return (max_tir_num - mlx5e_get_pf_num_tirs(mdev)
+ - (num_vports * REP_NUM_INDIR_TIRS)) / num_vports;
}
static void mlx5e_build_rep_params(struct net_device *netdev)
@@ -1486,6 +688,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
params = &priv->channels.params;
+
+ params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
params->sw_mtu = netdev->mtu;
@@ -1502,43 +706,27 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
- params->num_tc = 1;
+ params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ params->vlan_strip_disable = true;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
-
- /* RSS */
- mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
}
-static void mlx5e_build_rep_netdev(struct net_device *netdev)
+static void mlx5e_build_rep_netdev(struct net_device *netdev,
+ struct mlx5_core_dev *mdev)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
- struct mlx5_core_dev *mdev = priv->mdev;
-
- if (rep->vport == MLX5_VPORT_UPLINK) {
- SET_NETDEV_DEV(netdev, mdev->device);
- netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
- /* we want a persistent mac for the uplink rep */
- mlx5_query_mac_address(mdev, netdev->dev_addr);
- netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (MLX5_CAP_GEN(mdev, qos))
- netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
-#endif
- } else {
- netdev->netdev_ops = &mlx5e_netdev_ops_rep;
- eth_hw_addr_random(netdev);
- netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
- }
+ SET_NETDEV_DEV(netdev, mdev->device);
+ netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+ eth_hw_addr_random(netdev);
+ netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->watchdog_timeo = 15 * HZ;
- netdev->features |= NETIF_F_NETNS_LOCAL;
-
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
netdev->hw_features |= NETIF_F_HW_TC;
+#endif
netdev->hw_features |= NETIF_F_SG;
netdev->hw_features |= NETIF_F_IP_CSUM;
netdev->hw_features |= NETIF_F_IPV6_CSUM;
@@ -1547,63 +735,131 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
- if (rep->vport == MLX5_VPORT_UPLINK)
- netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
- else
- netdev->features |= NETIF_F_VLAN_CHALLENGED;
-
netdev->features |= netdev->hw_features;
+ netdev->features |= NETIF_F_NETNS_LOCAL;
}
static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+ struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
+ mlx5e_build_rep_params(netdev);
+ mlx5e_timestamp_init(priv);
+
+ return 0;
+}
+
+static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
- err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
- if (err)
- return err;
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
- priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
+ err = mlx5e_ipsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err);
+ mlx5e_vxlan_set_netdev_info(priv);
mlx5e_build_rep_params(netdev);
- mlx5e_build_rep_netdev(netdev);
-
mlx5e_timestamp_init(priv);
-
return 0;
}
static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
{
- mlx5e_netdev_cleanup(priv->netdev, priv);
+ mlx5e_fs_cleanup(priv->fs);
+ mlx5e_ipsec_cleanup(priv);
}
static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct ttc_params ttc_params = {};
- int tt, err;
+ int err;
- priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
+ mlx5e_fs_set_ns(priv->fs,
+ mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL), false);
/* The inner_ttc in the ttc params is intentionally not set */
- ttc_params.any_tt_tirn = priv->direct_tir[0].tirn;
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+ mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false);
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
- if (err) {
- netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ /* To give uplik rep TTC a lower level for chaining from root ft */
+ ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
+
+ mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false);
+ if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) {
+ err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false));
+ netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+ err);
return err;
}
return 0;
}
+static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ if (rep->vport != MLX5_VPORT_UPLINK) {
+ /* non uplik reps will skip any bypass tables and go directly to
+ * their own ttc
+ */
+ rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false));
+ return 0;
+ }
+
+ /* uplink root ft will be used to auto chain, to ethtool or ttc tables */
+ ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ netdev_err(priv->netdev, "Failed to get reps offloads namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */
+ ft_attr.prio = 1;
+ ft_attr.level = 1;
+
+ rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(rpriv->root_ft)) {
+ err = PTR_ERR(rpriv->root_ft);
+ rpriv->root_ft = NULL;
+ }
+
+ return err;
+}
+
+static void mlx5e_destroy_rep_root_ft(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ return;
+ mlx5_destroy_flow_table(rpriv->root_ft);
+}
+
static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -1612,23 +868,46 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_destination dest;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = priv->direct_tir[0].tirn;
- flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
- rep->vport,
- &dest);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rpriv->root_ft;
+
+ flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, &dest);
if (IS_ERR(flow_rule))
return PTR_ERR(flow_rule);
rpriv->vport_rx_rule = flow_rule;
return 0;
}
+static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (!rpriv->vport_rx_rule)
+ return;
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rpriv->vport_rx_rule = NULL;
+}
+
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+{
+ rep_vport_rx_rule_destroy(priv);
+
+ return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv);
+}
+
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
int err;
- mlx5e_init_l2_addr(priv);
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res) {
+ err = -ENOMEM;
+ goto err_free_fs;
+ }
+
+ mlx5e_fs_init_l2_addr(priv->fs, priv->netdev);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
if (err) {
@@ -1636,75 +915,76 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
return err;
}
- err = mlx5e_create_indirect_rqt(priv);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, false);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
+ err = mlx5e_create_rep_ttc_table(priv);
if (err)
- goto err_destroy_indirect_tirs;
+ goto err_destroy_rx_res;
- err = mlx5e_create_rep_ttc_table(priv);
+ err = mlx5e_create_rep_root_ft(priv);
if (err)
- goto err_destroy_direct_tirs;
+ goto err_destroy_ttc_table;
err = mlx5e_create_rep_vport_rx_rule(priv);
if (err)
- goto err_destroy_ttc_table;
+ goto err_destroy_root_ft;
+
+ mlx5e_ethtool_init_steering(priv->fs);
return 0;
+err_destroy_root_ft:
+ mlx5e_destroy_rep_root_ft(priv);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv, false);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
-err_destroy_indirect_rqts:
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+err_free_fs:
+ mlx5e_fs_cleanup(priv->fs);
return err;
}
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- mlx5_del_flow_rules(rpriv->vport_rx_rule);
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
- mlx5e_destroy_indirect_tirs(priv, false);
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_ethtool_cleanup_steering(priv->fs);
+ rep_vport_rx_rule_destroy(priv);
+ mlx5e_destroy_rep_root_ft(priv);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
}
static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
{
- int err = mlx5e_init_rep_rx(priv);
+ int err;
+ mlx5e_create_q_counters(priv);
+ err = mlx5e_init_rep_rx(priv);
if (err)
- return err;
+ goto out;
- mlx5e_create_q_counters(priv);
- return 0;
+ mlx5e_tc_int_port_init_rep_rx(priv);
+
+out:
+ return err;
}
static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv)
{
- mlx5e_destroy_q_counters(priv);
+ mlx5e_tc_int_port_cleanup_rep_rx(priv);
mlx5e_cleanup_rep_rx(priv);
+ mlx5e_destroy_q_counters(priv);
}
static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
@@ -1718,34 +998,35 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
priv = netdev_priv(netdev);
uplink_priv = &rpriv->uplink_priv;
- mutex_init(&uplink_priv->unready_flows_lock);
- INIT_LIST_HEAD(&uplink_priv->unready_flows);
-
- /* init shared tc flow table */
- err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ err = mlx5e_rep_tc_init(rpriv);
if (err)
return err;
mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
- /* init indirect block notifications */
- INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
- uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
- err = register_netdevice_notifier_dev_net(rpriv->netdev,
- &uplink_priv->netdevice_nb,
- &uplink_priv->netdevice_nn);
+ mlx5e_rep_bond_init(rpriv);
+ err = mlx5e_rep_tc_netdevice_event_register(rpriv);
if (err) {
- mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
- goto tc_esw_cleanup;
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
+ err);
+ goto err_event_reg;
}
return 0;
-tc_esw_cleanup:
- mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+err_event_reg:
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
return err;
}
+static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+ mlx5e_rep_tc_netdevice_event_unregister(rpriv);
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
+}
+
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1760,44 +1041,48 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
err = mlx5e_init_uplink_rep_tx(rpriv);
if (err)
- goto destroy_tises;
+ goto err_init_tx;
}
+ err = mlx5e_tc_ht_init(&rpriv->tc_ht);
+ if (err)
+ goto err_ht_init;
+
return 0;
-destroy_tises:
+err_ht_init:
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_cleanup_uplink_rep_tx(rpriv);
+err_init_tx:
mlx5e_destroy_tises(priv);
return err;
}
-static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-
- /* clean indirect TC block notifications */
- unregister_netdevice_notifier_dev_net(rpriv->netdev,
- &uplink_priv->netdevice_nb,
- &uplink_priv->netdevice_nn);
- mlx5e_rep_indr_clean_block_privs(rpriv);
-
- /* delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
- mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
-}
-
static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
- mlx5e_destroy_tises(priv);
+ mlx5e_tc_ht_cleanup(&rpriv->tc_ht);
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);
+
+ mlx5e_destroy_tises(priv);
}
static void mlx5e_rep_enable(struct mlx5e_priv *priv)
{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
mlx5e_set_netdev_mtu_boundaries(priv);
+ mlx5e_rep_neigh_init(rpriv);
+}
+
+static void mlx5e_rep_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ mlx5e_rep_neigh_cleanup(rpriv);
}
static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
@@ -1824,22 +1109,17 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
return NOTIFY_OK;
}
- if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
-
- return NOTIFY_OK;
- }
+ if (event == MLX5_DEV_EVENT_PORT_AFFINITY)
+ return mlx5e_rep_tc_event_port_affinity(priv);
return NOTIFY_DONE;
}
static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
u16 max_mtu;
netdev->min_mtu = ETH_MIN_MTU;
@@ -1847,29 +1127,47 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
mlx5e_set_dev_port_mtu(priv);
- INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
- mlx5e_tc_reoffload_flows_work);
+ mlx5e_rep_tc_enable(priv);
- mlx5_lag_add(mdev, netdev);
+ if (MLX5_CAP_GEN(mdev, uplink_follow))
+ mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
+ 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
+ mlx5_lag_add_netdev(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
-#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_initialize(priv);
mlx5e_dcbnl_init_app(priv);
-#endif
+ mlx5e_rep_neigh_init(rpriv);
+ mlx5e_rep_bridge_init(priv);
+
+ netdev->wanted_features |= NETIF_F_HW_TC;
+
+ rtnl_lock();
+ if (netif_running(netdev))
+ mlx5e_open(netdev);
+ udp_tunnel_nic_reset_ntf(priv->netdev);
+ netif_device_attach(netdev);
+ rtnl_unlock();
}
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
- struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
+ rtnl_lock();
+ if (netif_running(priv->netdev))
+ mlx5e_close(priv->netdev);
+ netif_device_detach(priv->netdev);
+ rtnl_unlock();
+
+ mlx5e_rep_bridge_cleanup(priv);
+ mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_dcbnl_delete_app(priv);
-#endif
mlx5_notifier_unregister(mdev, &priv->events_nb);
- cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
- mlx5_lag_remove(mdev);
+ mlx5e_rep_tc_disable(priv);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
+ mlx5_vxlan_reset_to_default(mdev->vxlan);
}
static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
@@ -1902,6 +1200,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
&MLX5E_STATS_GRP(pme),
&MLX5E_STATS_GRP(channels),
&MLX5E_STATS_GRP(per_port_buff_congest),
+#ifdef CONFIG_MLX5_EN_IPSEC
+ &MLX5E_STATS_GRP(ipsec_sw),
+#endif
+ &MLX5E_STATS_GRP(ptp),
};
static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
@@ -1917,18 +1219,18 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.init_tx = mlx5e_init_rep_tx,
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_rep_enable,
+ .disable = mlx5e_rep_disable,
.update_rx = mlx5e_update_rep_rx,
- .update_stats = mlx5e_update_ndo_stats,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .update_stats = mlx5e_stats_update_ndo_stats,
+ .rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = 1,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_rep_stats_grps,
.stats_grps_num = mlx5e_rep_stats_grps_num,
+ .max_nch_limit = mlx5e_rep_max_nch_limit,
};
static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
- .init = mlx5e_init_rep,
+ .init = mlx5e_init_ul_rep,
.cleanup = mlx5e_cleanup_rep,
.init_rx = mlx5e_init_ul_rep_rx,
.cleanup_rx = mlx5e_cleanup_ul_rep_rx,
@@ -1937,160 +1239,140 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.enable = mlx5e_uplink_rep_enable,
.disable = mlx5e_uplink_rep_disable,
.update_rx = mlx5e_update_rep_rx,
- .update_stats = mlx5e_update_ndo_stats,
+ .update_stats = mlx5e_stats_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = MLX5E_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_ul_rep_stats_grps,
.stats_grps_num = mlx5e_ul_rep_stats_grps_num,
};
-static bool
-is_devlink_port_supported(const struct mlx5_core_dev *dev,
- const struct mlx5e_rep_priv *rpriv)
-{
- return rpriv->rep->vport == MLX5_VPORT_UPLINK ||
- rpriv->rep->vport == MLX5_VPORT_PF ||
- mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
-}
-
-static unsigned int
-vport_to_devlink_port_index(const struct mlx5_core_dev *dev, u16 vport_num)
-{
- return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num;
-}
-
-static int register_devlink_port(struct mlx5_core_dev *dev,
- struct mlx5e_rep_priv *rpriv)
+/* e-Switch vport representors */
+static int
+mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- struct devlink *devlink = priv_to_devlink(dev);
- struct mlx5_eswitch_rep *rep = rpriv->rep;
- struct netdev_phys_item_id ppid = {};
- unsigned int dl_port_index = 0;
+ struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev));
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct devlink_port *dl_port;
+ int err;
- if (!is_devlink_port_supported(dev, rpriv))
- return 0;
+ rpriv->netdev = priv->netdev;
- mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
+ err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
+ rpriv);
+ if (err)
+ return err;
- if (rep->vport == MLX5_VPORT_UPLINK) {
- devlink_port_attrs_set(&rpriv->dl_port,
- DEVLINK_PORT_FLAVOUR_PHYSICAL,
- PCI_FUNC(dev->pdev->devfn), false, 0,
- &ppid.id[0], ppid.id_len);
- dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
- } else if (rep->vport == MLX5_VPORT_PF) {
- devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
- &ppid.id[0], ppid.id_len,
- dev->pdev->devfn);
- dl_port_index = rep->vport;
- } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch,
- rpriv->rep->vport)) {
- devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
- &ppid.id[0], ppid.id_len,
- dev->pdev->devfn,
- rep->vport - 1);
- dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
- }
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_eth_set(dl_port, rpriv->netdev);
- return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
+ return 0;
}
-static void unregister_devlink_port(struct mlx5_core_dev *dev,
- struct mlx5e_rep_priv *rpriv)
+static void
+mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv)
{
- if (is_devlink_port_supported(dev, rpriv))
- devlink_port_unregister(&rpriv->dl_port);
+ struct net_device *netdev = rpriv->netdev;
+ struct devlink_port *dl_port;
+ struct mlx5_core_dev *dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ dev = priv->mdev;
+
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_clear(dl_port);
+ mlx5e_netdev_attach_nic_profile(priv);
}
-/* e-Switch vport representors */
static int
-mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
const struct mlx5e_profile *profile;
- struct mlx5e_rep_priv *rpriv;
+ struct devlink_port *dl_port;
struct net_device *netdev;
- int nch, err;
-
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
- if (!rpriv)
- return -ENOMEM;
-
- /* rpriv->rep to be looked up when profile->init() is called */
- rpriv->rep = rep;
+ struct mlx5e_priv *priv;
+ int err;
- nch = mlx5e_get_max_num_channels(dev);
- profile = (rep->vport == MLX5_VPORT_UPLINK) ?
- &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
- netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
+ profile = &mlx5e_rep_profile;
+ netdev = mlx5e_create_netdev(dev, profile);
if (!netdev) {
- pr_warn("Failed to create representor netdev for vport %d\n",
- rep->vport);
- kfree(rpriv);
+ mlx5_core_warn(dev,
+ "Failed to create representor netdev for vport %d\n",
+ rep->vport);
return -EINVAL;
}
- dev_net_set(netdev, mlx5_core_net(dev));
+ mlx5e_build_rep_netdev(netdev, dev);
rpriv->netdev = netdev;
- rep->rep_data[REP_ETH].priv = rpriv;
- INIT_LIST_HEAD(&rpriv->vport_sqs_list);
- if (rep->vport == MLX5_VPORT_UPLINK) {
- err = mlx5e_create_mdev_resources(dev);
- if (err)
- goto err_destroy_netdev;
- }
-
- err = mlx5e_attach_netdev(netdev_priv(netdev));
- if (err) {
- pr_warn("Failed to attach representor netdev for vport %d\n",
- rep->vport);
- goto err_destroy_mdev_resources;
- }
-
- err = mlx5e_rep_neigh_init(rpriv);
+ priv = netdev_priv(netdev);
+ priv->profile = profile;
+ priv->ppriv = rpriv;
+ err = profile->init(dev, netdev);
if (err) {
- pr_warn("Failed to initialized neighbours handling for vport %d\n",
- rep->vport);
- goto err_detach_netdev;
+ netdev_warn(netdev, "rep profile init failed, %d\n", err);
+ goto err_destroy_netdev;
}
- err = register_devlink_port(dev, rpriv);
+ err = mlx5e_attach_netdev(netdev_priv(netdev));
if (err) {
- esw_warn(dev, "Failed to register devlink port %d\n",
- rep->vport);
- goto err_neigh_cleanup;
+ netdev_warn(netdev,
+ "Failed to attach representor netdev for vport %d\n",
+ rep->vport);
+ goto err_cleanup_profile;
}
err = register_netdev(netdev);
if (err) {
- pr_warn("Failed to register representor netdev for vport %d\n",
- rep->vport);
- goto err_devlink_cleanup;
+ netdev_warn(netdev,
+ "Failed to register representor netdev for vport %d\n",
+ rep->vport);
+ goto err_detach_netdev;
}
- if (is_devlink_port_supported(dev, rpriv))
- devlink_port_type_eth_set(&rpriv->dl_port, netdev);
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_eth_set(dl_port, netdev);
return 0;
-err_devlink_cleanup:
- unregister_devlink_port(dev, rpriv);
-
-err_neigh_cleanup:
- mlx5e_rep_neigh_cleanup(rpriv);
-
err_detach_netdev:
mlx5e_detach_netdev(netdev_priv(netdev));
-err_destroy_mdev_resources:
- if (rep->vport == MLX5_VPORT_UPLINK)
- mlx5e_destroy_mdev_resources(dev);
+err_cleanup_profile:
+ priv->profile->cleanup(priv);
err_destroy_netdev:
mlx5e_destroy_netdev(netdev_priv(netdev));
- kfree(rpriv);
+ return err;
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ int err;
+
+ rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return -ENOMEM;
+
+ /* rpriv->rep to be looked up when profile->init() is called */
+ rpriv->rep = rep;
+ rep->rep_data[REP_ETH].priv = rpriv;
+ INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ err = mlx5e_vport_uplink_rep_load(dev, rep);
+ else
+ err = mlx5e_vport_vf_rep_load(dev, rep);
+
+ if (err)
+ kvfree(rpriv);
+
return err;
}
@@ -2101,18 +1383,23 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *dev = priv->mdev;
+ struct devlink_port *dl_port;
void *ppriv = priv->ppriv;
- if (is_devlink_port_supported(dev, rpriv))
- devlink_port_type_clear(&rpriv->dl_port);
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ mlx5e_vport_uplink_rep_unload(rpriv);
+ goto free_ppriv;
+ }
+
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_clear(dl_port);
unregister_netdev(netdev);
- unregister_devlink_port(dev, rpriv);
- mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
- if (rep->vport == MLX5_VPORT_UPLINK)
- mlx5e_destroy_mdev_resources(priv->mdev);
+ priv->profile->cleanup(priv);
mlx5e_destroy_netdev(priv);
- kfree(ppriv); /* mlx5e_rep_priv */
+free_ppriv:
+ kvfree(ppriv); /* mlx5e_rep_priv */
}
static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
@@ -2124,22 +1411,108 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (!rep_sq->send_to_vport_rule_peer)
+ continue;
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+ rep_sq->send_to_vport_rule_peer = NULL;
+ }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ struct mlx5_eswitch *peer_esw)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (rep_sq->send_to_vport_rule_peer)
+ continue;
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+ if (IS_ERR(flow_rule))
+ goto err_out;
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
+ return 0;
+err_out:
+ mlx5e_vport_rep_event_unpair(rep);
+ return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data)
+{
+ int err = 0;
+
+ if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+ err = mlx5e_vport_rep_event_pair(esw, rep, data);
+ else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+ mlx5e_vport_rep_event_unpair(rep);
+
+ return err;
+}
+
static const struct mlx5_eswitch_rep_ops rep_ops = {
.load = mlx5e_vport_rep_load,
.unload = mlx5e_vport_rep_unload,
- .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+ .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+ .event = mlx5e_vport_rep_event,
};
-void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
+static int mlx5e_rep_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct mlx5_eswitch *esw;
+ esw = mdev->priv.eswitch;
mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH);
+ return 0;
}
-void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
+static void mlx5e_rep_remove(struct auxiliary_device *adev)
{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_adev *vdev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = vdev->mdev;
+ struct mlx5_eswitch *esw;
+ esw = mdev->priv.eswitch;
mlx5_eswitch_unregister_vport_reps(esw, REP_ETH);
}
+
+static const struct auxiliary_device_id mlx5e_rep_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".eth-rep", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5e_rep_id_table);
+
+static struct auxiliary_driver mlx5e_rep_driver = {
+ .name = "eth-rep",
+ .probe = mlx5e_rep_probe,
+ .remove = mlx5e_rep_remove,
+ .id_table = mlx5e_rep_id_table,
+};
+
+int mlx5e_rep_init(void)
+{
+ return auxiliary_driver_register(&mlx5e_rep_driver);
+}
+
+void mlx5e_rep_cleanup(void)
+{
+ auxiliary_driver_unregister(&mlx5e_rep_driver);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 3f756d51435f..b4e691760da9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -41,6 +41,8 @@
#include "lib/port_tun.h"
#ifdef CONFIG_MLX5_ESWITCH
+extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep;
+
struct mlx5e_neigh_update_table {
struct rhashtable neigh_ht;
/* Save the neigh hash entries in a list in addition to the hash table
@@ -55,25 +57,22 @@ struct mlx5e_neigh_update_table {
unsigned long min_interval; /* jiffies */
};
-struct mlx5_rep_uplink_priv {
- /* Filters DB - instantiated by the uplink representor and shared by
- * the uplink's VFs
- */
- struct rhashtable tc_ht;
+struct mlx5_tc_ct_priv;
+struct mlx5_tc_int_port_priv;
+struct mlx5e_rep_bond;
+struct mlx5e_tc_tun_encap;
+struct mlx5e_post_act;
+struct mlx5e_flow_meters;
+struct mlx5_rep_uplink_priv {
/* indirect block callbacks are invoked on bind/unbind events
* on registered higher level devices (e.g. tunnel devices)
*
* tc_indr_block_cb_priv_list is used to lookup indirect callback
* private data
*
- * netdevice_nb is the netdev events notifier - used to register
- * tunnel devices for block events
- *
*/
struct list_head tc_indr_block_priv_list;
- struct notifier_block netdevice_nb;
- struct netdev_net_notifier netdevice_nn;
struct mlx5_tun_entropy tun_entropy;
@@ -81,17 +80,39 @@ struct mlx5_rep_uplink_priv {
struct mutex unready_flows_lock;
struct list_head unready_flows;
struct work_struct reoffload_flows_work;
+
+ /* maps tun_info to a unique id*/
+ struct mapping_ctx *tunnel_mapping;
+ /* maps tun_enc_opts to a unique id*/
+ struct mapping_ctx *tunnel_enc_opts_mapping;
+
+ struct mlx5e_post_act *post_act;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5e_tc_psample *tc_psample;
+
+ /* support eswitch vports bonding */
+ struct mlx5e_rep_bond *bond;
+
+ /* tc tunneling encapsulation private data */
+ struct mlx5e_tc_tun_encap *encap;
+
+ /* OVS internal port support */
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+
+ struct mlx5e_flow_meters *flow_meters;
};
struct mlx5e_rep_priv {
struct mlx5_eswitch_rep *rep;
struct mlx5e_neigh_update_table neigh_update;
struct net_device *netdev;
+ struct mlx5_flow_table *root_ft;
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
struct rtnl_link_stats64 prev_vf_vport_stats;
- struct devlink_port dl_port;
+ struct mlx5_flow_handle *send_to_vport_meta_rule;
+ struct rhashtable tc_ht;
};
static inline
@@ -101,7 +122,6 @@ struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
}
struct mlx5e_neigh {
- struct net_device *dev;
union {
__be32 v4;
struct in6_addr v6;
@@ -113,6 +133,7 @@ struct mlx5e_neigh_hash_entry {
struct rhash_head rhash_node;
struct mlx5e_neigh m_neigh;
struct mlx5e_priv *priv;
+ struct net_device *neigh_dev;
/* Save the neigh hash entry in a list on the representor in
* addition to the hash table. In order to iterate easily over the
@@ -125,19 +146,13 @@ struct mlx5e_neigh_hash_entry {
/* encap list sharing the same neigh */
struct list_head encap_list;
- /* valid only when the neigh reference is taken during
- * neigh_update_work workqueue callback.
- */
- struct neighbour *n;
- struct work_struct neigh_update_work;
-
/* neigh hash entry can be deleted only when the refcount is zero.
* refcount is needed to avoid neigh hash entry removal by TC, while
* it's used by the neigh notification call.
*/
refcount_t refcnt;
- /* Save the last reported time offloaded trafic pass over one of the
+ /* Save the last reported time offloaded traffic pass over one of the
* neigh hash entry flows. Use it to periodically update the neigh
* 'used' value and avoid neigh deleting by the kernel.
*/
@@ -149,6 +164,30 @@ struct mlx5e_neigh_hash_entry {
enum {
/* set when the encap entry is successfully offloaded into HW */
MLX5_ENCAP_ENTRY_VALID = BIT(0),
+ MLX5_REFORMAT_DECAP = BIT(1),
+ MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2),
+};
+
+struct mlx5e_decap_key {
+ struct ethhdr key;
+};
+
+struct mlx5e_decap_entry {
+ struct mlx5e_decap_key key;
+ struct list_head flows;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_mpls_info {
+ u32 label;
+ u8 tc;
+ u8 bos;
+ u8 ttl;
};
struct mlx5e_encap_entry {
@@ -156,18 +195,19 @@ struct mlx5e_encap_entry {
struct mlx5e_neigh_hash_entry *nhe;
/* neigh hash entry list of encaps sharing the same neigh */
struct list_head encap_list;
- struct mlx5e_neigh m_neigh;
/* a node of the eswitch encap hash table which keeping all the encap
* entries
*/
struct hlist_node encap_hlist;
struct list_head flows;
+ struct list_head route_list;
struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
struct net_device *out_dev;
- struct net_device *route_dev;
+ int route_dev_ifindex;
struct mlx5e_tc_tunnel *tunnel;
int reformat_type;
u8 flags;
@@ -181,30 +221,51 @@ struct mlx5e_encap_entry {
struct mlx5e_rep_sq {
struct mlx5_flow_handle *send_to_vport_rule;
+ struct mlx5_flow_handle *send_to_vport_rule_peer;
+ u32 sqn;
struct list_head list;
};
-void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
-void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
-bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+int mlx5e_rep_init(void);
+void mlx5e_rep_cleanup(void);
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev);
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev);
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup);
-void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id);
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp);
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e);
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e);
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv);
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
-bool mlx5e_eswitch_rep(struct net_device *netdev);
+bool mlx5e_eswitch_vf_rep(const struct net_device *netdev);
+bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev);
+static inline bool mlx5e_eswitch_rep(const struct net_device *netdev)
+{
+ return mlx5e_eswitch_vf_rep(netdev) ||
+ mlx5e_eswitch_uplink_rep(netdev);
+}
#else /* CONFIG_MLX5_ESWITCH */
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
-static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {}
+static inline int mlx5e_rep_init(void) { return 0; };
+static inline void mlx5e_rep_cleanup(void) {};
+static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev,
+ int attr_id) { return false; }
+static inline int mlx5e_rep_get_offload_stats(int attr_id,
+ const struct net_device *dev,
+ void *sp) { return -EOPNOTSUPP; }
#endif
static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1c3ab69cbd96..a61a43fc8d5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -30,25 +30,51 @@
* SOFTWARE.
*/
-#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
-#include <linux/indirect_call_wrapper.h>
+#include <linux/bitmap.h>
+#include <linux/filter.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
+#include <net/gro.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/xdp_sock_drv.h>
#include "en.h"
+#include "en/txrx.h"
#include "en_tc.h"
#include "eswitch.h"
#include "en_rep.h"
+#include "en/rep/tc.h"
#include "ipoib/ipoib.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/macsec.h"
#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/tls_rxtx.h"
-#include "lib/clock.h"
+#include "en_accel/ktls_txrx.h"
#include "en/xdp.h"
#include "en/xsk/rx.h"
#include "en/health.h"
+#include "en/params.h"
+#include "devlink.h"
+#include "en/devlink.h"
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+static struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
+ .handle_rx_cqe = mlx5e_handle_rx_cqe,
+ .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
+};
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
@@ -120,8 +146,17 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
title->check_sum = mini_cqe->checksum;
title->op_own &= 0xf0;
title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz);
- title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
+ /* state bit set implies linked-list striding RQ wq type and
+ * HW stride index capability supported
+ */
+ if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
+ title->wqe_counter = mini_cqe->stridx;
+ return;
+ }
+
+ /* HW stride index capability not supported */
+ title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
else
@@ -158,7 +193,9 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
- rq->handle_rx_cqe(rq, &cqd->title);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+ rq, &cqd->title);
}
mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
wq->cc = cqcc;
@@ -178,19 +215,15 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
mlx5e_read_title_slot(rq, wq, cc);
mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
mlx5e_decompress_cqe(rq, wq, cc);
- rq->handle_rx_cqe(rq, &cqd->title);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+ rq, &cqd->title);
cqd->mini_arr_idx++;
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
}
-static inline bool mlx5e_page_is_reserved(struct page *page)
-{
- return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id();
-}
-
-static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
{
struct mlx5e_page_cache *cache = &rq->page_cache;
u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
@@ -201,120 +234,101 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
return false;
}
- if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
+ if (!dev_page_is_reusable(page)) {
stats->cache_waive++;
return false;
}
- cache->page_cache[cache->tail] = *dma_info;
+ cache->page_cache[cache->tail] = page;
cache->tail = tail_next;
return true;
}
-static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
{
struct mlx5e_page_cache *cache = &rq->page_cache;
struct mlx5e_rq_stats *stats = rq->stats;
+ dma_addr_t addr;
if (unlikely(cache->head == cache->tail)) {
stats->cache_empty++;
return false;
}
- if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+ if (page_ref_count(cache->page_cache[cache->head]) != 1) {
stats->cache_busy++;
return false;
}
- *dma_info = cache->page_cache[cache->head];
+ au->page = cache->page_cache[cache->head];
cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
stats->cache_reuse++;
- dma_sync_single_for_device(rq->pdev, dma_info->addr,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
+ addr = page_pool_get_dma_addr(au->page);
+ /* Non-XSK always uses PAGE_SIZE. */
+ dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
return true;
}
-static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
{
- if (mlx5e_rx_cache_get(rq, dma_info))
+ dma_addr_t addr;
+
+ if (mlx5e_rx_cache_get(rq, au))
return 0;
- dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
- if (unlikely(!dma_info->page))
+ au->page = page_pool_dev_alloc_pages(rq->page_pool);
+ if (unlikely(!au->page))
return -ENOMEM;
- dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
- PAGE_SIZE, rq->buff.map_dir);
- if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
- page_pool_recycle_direct(rq->page_pool, dma_info->page);
- dma_info->page = NULL;
+ /* Non-XSK always uses PAGE_SIZE. */
+ addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
+ if (unlikely(dma_mapping_error(rq->pdev, addr))) {
+ page_pool_recycle_direct(rq->page_pool, au->page);
+ au->page = NULL;
return -ENOMEM;
}
+ page_pool_set_dma_addr(au->page, addr);
return 0;
}
-static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
{
- if (rq->umem)
- return mlx5e_xsk_page_alloc_umem(rq, dma_info);
- else
- return mlx5e_page_alloc_pool(rq, dma_info);
-}
+ dma_addr_t dma_addr = page_pool_get_dma_addr(page);
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
-{
- dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
+ dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ page_pool_set_dma_addr(page, 0);
}
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle)
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
{
if (likely(recycle)) {
- if (mlx5e_rx_cache_put(rq, dma_info))
+ if (mlx5e_rx_cache_put(rq, page))
return;
- mlx5e_page_dma_unmap(rq, dma_info);
- page_pool_recycle_direct(rq->page_pool, dma_info->page);
+ mlx5e_page_dma_unmap(rq, page);
+ page_pool_recycle_direct(rq->page_pool, page);
} else {
- mlx5e_page_dma_unmap(rq, dma_info);
- page_pool_release_page(rq->page_pool, dma_info->page);
- put_page(dma_info->page);
+ mlx5e_page_dma_unmap(rq, page);
+ page_pool_release_page(rq->page_pool, page);
+ put_page(page);
}
}
-static inline void mlx5e_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle)
-{
- if (rq->umem)
- /* The `recycle` parameter is ignored, and the page is always
- * put into the Reuse Ring, because there is no way to return
- * the page to the userspace when the interface goes down.
- */
- mlx5e_xsk_page_release(rq, dma_info);
- else
- mlx5e_page_release_dynamic(rq, dma_info, recycle);
-}
-
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *frag)
{
int err = 0;
if (!frag->offset)
- /* On first frag (offset == 0), replenish page (dma_info actually).
- * Other frags that point to the same dma_info (with a different
+ /* On first frag (offset == 0), replenish page (alloc_unit actually).
+ * Other frags that point to the same alloc_unit (with a different
* offset) should just use the new one without replenishing again
* by themselves.
*/
- err = mlx5e_page_alloc(rq, frag->di);
+ err = mlx5e_page_alloc_pool(rq, frag->au);
return err;
}
@@ -324,7 +338,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
bool recycle)
{
if (frag->last_in_page)
- mlx5e_page_release(rq, frag->di, recycle);
+ mlx5e_page_release_dynamic(rq, frag->au->page, recycle);
}
static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
@@ -340,12 +354,16 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
int i;
for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
+ dma_addr_t addr;
+ u16 headroom;
+
err = mlx5e_get_rx_frag(rq, frag);
if (unlikely(err))
goto free_frags;
- wqe->data[i].addr = cpu_to_be64(frag->di->addr +
- frag->offset + rq->buff.headroom);
+ headroom = i == 0 ? rq->buff.headroom : 0;
+ addr = page_pool_get_dma_addr(frag->au->page);
+ wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom);
}
return 0;
@@ -363,91 +381,98 @@ static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
{
int i;
+ if (rq->xsk_pool) {
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ xsk_buff_free(wi->au->xsk);
+ return;
+ }
+
for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
mlx5e_put_rx_frag(rq, wi, recycle);
}
-void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
mlx5e_free_rx_wqe(rq, wi, false);
}
-static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
+static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
- int err;
int i;
- if (rq->umem) {
- int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
-
- if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
- return -ENOMEM;
- }
-
for (i = 0; i < wqe_bulk; i++) {
- struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_rx_wqe_cyc *wqe;
- err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i);
- if (unlikely(err))
- goto free_wqes;
- }
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
- return 0;
-
-free_wqes:
- while (--i >= 0)
- mlx5e_dealloc_rx_wqe(rq, ix + i);
+ if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j)))
+ break;
+ }
- return err;
+ return i;
}
static inline void
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
- struct mlx5e_dma_info *di, u32 frag_offset, u32 len,
+ union mlx5e_alloc_unit *au, u32 frag_offset, u32 len,
unsigned int truesize)
{
- dma_sync_single_for_cpu(rq->pdev,
- di->addr + frag_offset,
- len, DMA_FROM_DEVICE);
- page_ref_inc(di->page);
+ dma_addr_t addr = page_pool_get_dma_addr(au->page);
+
+ dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
+ rq->buff.map_dir);
+ page_ref_inc(au->page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- di->page, frag_offset, len, truesize);
+ au->page, frag_offset, len, truesize);
}
static inline void
-mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
- struct mlx5e_dma_info *dma_info,
- int offset_from, u32 headlen)
+mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct page *page, dma_addr_t addr,
+ int offset_from, int dma_offset, u32 headlen)
{
- const void *from = page_address(dma_info->page) + offset_from;
+ const void *from = page_address(page) + offset_from;
/* Aligning len to sizeof(long) optimizes memcpy performance */
unsigned int len = ALIGN(headlen, sizeof(long));
- dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
- DMA_FROM_DEVICE);
+ dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len,
+ rq->buff.map_dir);
skb_copy_to_linear_data(skb, from, len);
}
static void
mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
{
+ union mlx5e_alloc_unit *alloc_units = wi->alloc_units;
bool no_xdp_xmit;
- struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
int i;
/* A common case for AF_XDP. */
- if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
+ if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe))
return;
- no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
- MLX5_MPWRQ_PAGES_PER_WQE);
+ no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
- for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
- if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
- mlx5e_page_release(rq, &dma_info[i], recycle);
+ if (rq->xsk_pool) {
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ xsk_buff_free(alloc_units[i].xsk);
+ } else {
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle);
+ }
}
static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
@@ -466,67 +491,208 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
mlx5_wq_ll_update_db_record(wq);
}
-static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
- struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
+/* This function returns the size of the continuous free space inside a bitmap
+ * that starts from first and no longer than len including circular ones.
+ */
+static int bitmap_find_window(unsigned long *bitmap, int len,
+ int bitmap_size, int first)
+{
+ int next_one, count;
+
+ next_one = find_next_bit(bitmap, bitmap_size, first);
+ if (next_one == bitmap_size) {
+ if (bitmap_size - first >= len)
+ return len;
+ next_one = find_next_bit(bitmap, bitmap_size, 0);
+ count = next_one + bitmap_size - first;
+ } else {
+ count = next_one - first;
+ }
+
+ return min(len, count);
+}
+
+static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
+ __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
+{
+ memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+ umr_wqe->ctrl.umr_mkey = key;
+ umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
+ | MLX5E_KLM_UMR_DS_CNT(klm_len));
+ umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+ umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
+ umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
+ struct mlx5e_icosq *sq,
+ u16 klm_entries, u16 index)
{
- struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
+ u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
+ struct page *page = shampo->last_page;
+ u64 addr = shampo->last_addr;
+ struct mlx5e_dma_info *dma_info;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int headroom, i;
+
+ headroom = rq->buff.headroom;
+ new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+ entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT);
+ wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
+ pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+ build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+
+ for (i = 0; i < entries; i++, index++) {
+ dma_info = &shampo->info[index];
+ if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
+ MLX5_UMR_KLM_ALIGNMENT))
+ goto update_klm;
+ header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
+ MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
+ if (!(header_offset & (PAGE_SIZE - 1))) {
+ union mlx5e_alloc_unit au;
+
+ err = mlx5e_page_alloc_pool(rq, &au);
+ if (unlikely(err))
+ goto err_unmap;
+ page = dma_info->page = au.page;
+ addr = dma_info->addr = page_pool_get_dma_addr(au.page);
+ } else {
+ dma_info->addr = addr + header_offset;
+ dma_info->page = page;
+ }
+
+update_klm:
+ umr_wqe->inline_klms[i].bcount =
+ cpu_to_be32(MLX5E_RX_MAX_HEAD);
+ umr_wqe->inline_klms[i].key = cpu_to_be32(lkey);
+ umr_wqe->inline_klms[i].va =
+ cpu_to_be64(dma_info->addr + headroom);
+ }
- edge_wi = wi + nnops;
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+ .num_wqebbs = wqe_bbs,
+ .shampo.len = new_entries,
+ };
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->opcode = MLX5_OPCODE_NOP;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
+ shampo->last_page = page;
+ shampo->last_addr = addr;
+ sq->pc += wqe_bbs;
+ sq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_unmap:
+ while (--i >= 0) {
+ dma_info = &shampo->info[--index];
+ if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
+ dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
+ mlx5e_page_release_dynamic(rq, dma_info->page, true);
+ }
}
+ rq->stats->buff_alloc_err++;
+ return err;
+}
+
+static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u16 klm_entries, num_wqe, index, entries_before;
+ struct mlx5e_icosq *sq = rq->icosq;
+ int i, err, max_klm_entries, len;
+
+ max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
+ klm_entries = bitmap_find_window(shampo->bitmap,
+ shampo->hd_per_wqe,
+ shampo->hd_per_wq, shampo->pi);
+ if (!klm_entries)
+ return 0;
+
+ klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+ index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT);
+ entries_before = shampo->hd_per_wq - index;
+
+ if (unlikely(entries_before < klm_entries))
+ num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
+ DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
+ else
+ num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
+
+ for (i = 0; i < num_wqe; i++) {
+ len = (klm_entries > max_klm_entries) ? max_klm_entries :
+ klm_entries;
+ if (unlikely(index + len > shampo->hd_per_wq))
+ len = shampo->hd_per_wq - index;
+ err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
+ if (unlikely(err))
+ return err;
+ index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
+ klm_entries -= len;
+ }
+
+ return 0;
}
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
- struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
- struct mlx5e_icosq *sq = &rq->channel->icosq;
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ union mlx5e_alloc_unit *au = &wi->alloc_units[0];
+ struct mlx5e_icosq *sq = rq->icosq;
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
- u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
- u16 pi, contig_wqebbs_room;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
int err;
int i;
- if (rq->umem &&
- unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
- err = -ENOMEM;
- goto err;
- }
-
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
- mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ err = mlx5e_alloc_rx_hd_mpwqe(rq);
+ if (unlikely(err))
+ goto err;
}
+ pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs);
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
- for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
- err = mlx5e_page_alloc(rq, dma_info);
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
+ dma_addr_t addr;
+
+ err = mlx5e_page_alloc_pool(rq, au);
if (unlikely(err))
goto err_unmap;
- umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+ addr = page_pool_get_dma_addr(au->page);
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
}
- bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
wi->consumed_strides = 0;
umr_wqe->ctrl.opmod_idx_opcode =
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR);
- umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
- sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
- sq->db.ico_wqe[pi].umr.rq = rq;
- sq->pc += MLX5E_UMR_WQEBBS;
+ offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ sq->pc += rq->mpwqe.umr_wqebbs;
sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -534,8 +700,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
err_unmap:
while (--i >= 0) {
- dma_info--;
- mlx5e_page_release(rq, dma_info, true);
+ au--;
+ mlx5e_page_release_dynamic(rq, au->page, true);
}
err:
@@ -544,48 +710,151 @@ err:
return err;
}
-void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+/* This function is responsible to dealloc SHAMPO header buffer.
+ * close == true specifies that we are in the middle of closing RQ operation so
+ * we go over all the entries and if they are not in use we free them,
+ * otherwise we only go over a specific range inside the header buffer that are
+ * not in use.
+ */
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
{
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ int hd_per_wq = shampo->hd_per_wq;
+ struct page *deleted_page = NULL;
+ struct mlx5e_dma_info *hd_info;
+ int i, index = start;
+
+ for (i = 0; i < len; i++, index++) {
+ if (index == hd_per_wq)
+ index = 0;
+
+ if (close && !test_bit(index, shampo->bitmap))
+ continue;
+
+ hd_info = &shampo->info[index];
+ hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
+ if (hd_info->page != deleted_page) {
+ deleted_page = hd_info->page;
+ mlx5e_page_release_dynamic(rq, hd_info->page, false);
+ }
+ }
+
+ if (start + len > hd_per_wq) {
+ len -= hd_per_wq - start;
+ bitmap_clear(shampo->bitmap, start, hd_per_wq - start);
+ start = 0;
+ }
+
+ bitmap_clear(shampo->bitmap, start, len);
+}
+
+static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
/* Don't recycle, this function is called on rq/netdev close */
mlx5e_free_rx_mpwqe(rq, wi, false);
}
-bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
- u8 wqe_bulk;
- int err;
+ int wqe_bulk, count;
+ bool busy = false;
+ u16 head;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return false;
- wqe_bulk = rq->wqe.info.wqe_bulk;
-
- if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
+ if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk)
return false;
- do {
- u16 head = mlx5_wq_cyc_get_head(wq);
+ if (rq->page_pool)
+ page_pool_nid_changed(rq->page_pool, numa_mem_id());
- err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
- if (unlikely(err)) {
- rq->stats->buff_alloc_err++;
- break;
- }
+ wqe_bulk = mlx5_wq_cyc_missing(wq);
+ head = mlx5_wq_cyc_get_head(wq);
+
+ /* Don't allow any newly allocated WQEs to share the same page with old
+ * WQEs that aren't completed yet. Stop earlier.
+ */
+ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
- mlx5_wq_cyc_push_n(wq, wqe_bulk);
- } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
+ if (!rq->xsk_pool)
+ count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
+ else if (likely(!rq->xsk_pool->dma_need_sync))
+ count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
+ else
+ /* If dma_need_sync is true, it's more efficient to call
+ * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch,
+ * because the latter does the same check and returns only one
+ * frame.
+ */
+ count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk);
+
+ mlx5_wq_cyc_push_n(wq, count);
+ if (unlikely(count != wqe_bulk)) {
+ rq->stats->buff_alloc_err++;
+ busy = true;
+ }
/* ensure wqes are visible to device before updating doorbell record */
dma_wmb();
mlx5_wq_cyc_update_db_record(wq);
- return !!err;
+ return busy;
}
-void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
+void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
+{
+ u16 sqcc;
+
+ sqcc = sq->cc;
+
+ while (sqcc != sq->pc) {
+ struct mlx5e_icosq_wqe_info *wi;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+ sqcc += wi->num_wqebbs;
+#ifdef CONFIG_MLX5_EN_TLS
+ switch (wi->wqe_type) {
+ case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
+ mlx5e_ktls_handle_ctx_completion(wi);
+ break;
+ case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
+ mlx5e_ktls_handle_get_psv_completion(wi, sq);
+ break;
+ }
+#endif
+ }
+ sq->cc = sqcc;
+}
+
+static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
+ struct mlx5e_shampo_hd *shampo;
+ /* assume 1:1 relationship between RQ and icosq */
+ struct mlx5e_rq *rq = &c->rq;
+ int end, from, len = umr.len;
+
+ shampo = rq->mpwqe.shampo;
+ end = shampo->hd_per_wq;
+ from = shampo->ci;
+ if (from + len > shampo->hd_per_wq) {
+ len -= end - from;
+ bitmap_set(shampo->bitmap, from, end - from);
+ from = 0;
+ }
+
+ bitmap_set(shampo->bitmap, from, len);
+ shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
+}
+
+int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
{
struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
struct mlx5_cqe64 *cqe;
@@ -593,11 +862,11 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
int i;
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
- return;
+ return 0;
cqe = mlx5_cqwq_get_cqe(&cq->wq);
if (likely(!cqe))
- return;
+ return 0;
/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
* otherwise a cq overrun may occur
@@ -614,48 +883,66 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
- struct mlx5e_sq_wqe_info *wi;
+ struct mlx5e_icosq_wqe_info *wi;
u16 ci;
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
- wi = &sq->db.ico_wqe[ci];
+ wi = &sq->db.wqe_info[ci];
+ sqcc += wi->num_wqebbs;
if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
- netdev_WARN_ONCE(cq->channel->netdev,
+ netdev_WARN_ONCE(cq->netdev,
"Bad OP in ICOSQ CQE: 0x%x\n",
get_cqe_opcode(cqe));
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
- queue_work(cq->channel->priv->wq, &sq->recover_work);
+ queue_work(cq->priv->wq, &sq->recover_work);
break;
}
- if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
- sqcc += MLX5E_UMR_WQEBBS;
+ switch (wi->wqe_type) {
+ case MLX5E_ICOSQ_WQE_UMR_RX:
wi->umr.rq->mpwqe.umr_completed++;
- } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
- sqcc++;
- } else {
- netdev_WARN_ONCE(cq->channel->netdev,
- "Bad OPCODE in ICOSQ WQE info: 0x%x\n",
- wi->opcode);
+ break;
+ case MLX5E_ICOSQ_WQE_NOP:
+ break;
+ case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR:
+ mlx5e_handle_shampo_hd_umr(wi->shampo, sq);
+ break;
+#ifdef CONFIG_MLX5_EN_TLS
+ case MLX5E_ICOSQ_WQE_UMR_TLS:
+ break;
+ case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
+ mlx5e_ktls_handle_ctx_completion(wi);
+ break;
+ case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
+ mlx5e_ktls_handle_get_psv_completion(wi, sq);
+ break;
+#endif
+ default:
+ netdev_WARN_ONCE(cq->netdev,
+ "Bad WQE type in ICOSQ WQE info: 0x%x\n",
+ wi->wqe_type);
}
-
} while (!last_wqe);
-
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
sq->cc = sqcc;
mlx5_cqwq_update_db_record(&cq->wq);
+
+ return i;
}
-bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
+INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
{
- struct mlx5e_icosq *sq = &rq->channel->icosq;
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
u8 umr_completed = rq->mpwqe.umr_completed;
+ struct mlx5e_icosq *sq = rq->icosq;
int alloc_err = 0;
u8 missing, i;
u16 head;
@@ -674,14 +961,17 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
rq->stats->congst_umr++;
-#define UMR_WQE_BULK (2)
- if (likely(missing < UMR_WQE_BULK))
+ if (likely(missing < rq->mpwqe.min_wqe_bulk))
return false;
+ if (rq->page_pool)
+ page_pool_nid_changed(rq->page_pool, numa_mem_id());
+
head = rq->mpwqe.actual_wq_head;
i = missing;
do {
- alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+ alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
+ mlx5e_alloc_rx_mpwqe(rq, head);
if (unlikely(alloc_err))
break;
@@ -703,7 +993,7 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
* the driver when it refills the Fill Ring.
* 2. Otherwise, busy poll by rescheduling the NAPI poll.
*/
- if (unlikely(alloc_err == -ENOMEM && rq->umem))
+ if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
return true;
return false;
@@ -720,8 +1010,8 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
if (tcp_ack) {
tcp->ack = 1;
- tcp->ack_seq = cqe->lro_ack_seq_num;
- tcp->window = cqe->lro_tcp_win;
+ tcp->ack_seq = cqe->lro.ack_seq_num;
+ tcp->window = cqe->lro.tcp_win;
}
}
@@ -747,7 +1037,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
tcp = ip_p + sizeof(struct iphdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
- ipv4->ttl = cqe->lro_min_ttl;
+ ipv4->ttl = cqe->lro.min_ttl;
ipv4->tot_len = cpu_to_be16(tot_len);
ipv4->check = 0;
ipv4->check = ip_fast_csum((unsigned char *)ipv4,
@@ -767,7 +1057,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
tcp = ip_p + sizeof(struct ipv6hdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
- ipv6->hop_limit = cqe->lro_min_ttl;
+ ipv6->hop_limit = cqe->lro.min_ttl;
ipv6->payload_len = cpu_to_be16(payload_len);
mlx5e_lro_update_tcp_hdr(cqe, tcp);
@@ -779,6 +1069,142 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
}
}
+static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
+{
+ struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
+ u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
+
+ return page_address(last_head->page) + head_offset;
+}
+
+static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
+{
+ int udp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)(skb->data + udp_off);
+ uh->len = htons(skb->len - udp_off);
+
+ if (uh->check)
+ uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr,
+ ipv4->daddr, 0);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6)
+{
+ int udp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)(skb->data + udp_off);
+ uh->len = htons(skb->len - udp_off);
+
+ if (uh->check)
+ uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr,
+ &ipv6->daddr, 0);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct tcphdr *skb_tcp_hd)
+{
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+ struct tcphdr *last_tcp_hd;
+ void *last_hd_addr;
+
+ last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+ last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff;
+ tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+}
+
+static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4,
+ struct mlx5_cqe64 *cqe, bool match)
+{
+ int tcp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct tcphdr *tcp;
+
+ tcp = (struct tcphdr *)(skb->data + tcp_off);
+ if (match)
+ mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+ tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
+ ipv4->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+
+ skb->csum_start = (unsigned char *)tcp - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (tcp->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6,
+ struct mlx5_cqe64 *cqe, bool match)
+{
+ int tcp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct tcphdr *tcp;
+
+ tcp = (struct tcphdr *)(skb->data + tcp_off);
+ if (match)
+ mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+ tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr,
+ &ipv6->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ skb->csum_start = (unsigned char *)tcp - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (tcp->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+ bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP));
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (is_ipv4) {
+ int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr);
+ struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff);
+ __be16 newlen = htons(skb->len - nhoff);
+
+ csum_replace2(&ipv4->check, ipv4->tot_len, newlen);
+ ipv4->tot_len = newlen;
+
+ if (ipv4->protocol == IPPROTO_TCP)
+ mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match);
+ else
+ mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4);
+ } else {
+ int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr);
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff);
+
+ ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6));
+
+ if (ipv6->nexthdr == IPPROTO_TCP)
+ mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match);
+ else
+ mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6);
+ }
+}
+
static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
struct sk_buff *skb)
{
@@ -924,7 +1350,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
}
/* True when explicitly set via priv flag, or XDP prog is loaded */
- if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
+ get_cqe_tls_offload(cqe))
goto csum_unnecessary;
/* CQE csum doesn't cover padding octets in short ethernet
@@ -985,9 +1412,14 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
skb->mac_len = ETH_HLEN;
-#ifdef CONFIG_MLX5_EN_TLS
- mlx5e_tls_handle_rx_skb(netdev, skb, &cqe_bcnt);
-#endif
+ if (unlikely(get_cqe_tls_offload(cqe)))
+ mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
+
+ if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
+ mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
+
+ if (unlikely(mlx5e_macsec_is_rx_flow(cqe)))
+ mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
if (lro_num_seg > 1) {
mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
@@ -1001,9 +1433,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
}
if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
- skb_hwtstamps(skb)->hwtstamp =
- mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
-
+ skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
+ rq->clock, get_cqe_ts(cqe));
skb_record_rx_queue(skb, rq->ix);
if (likely(netdev->features & NETIF_F_RXHASH))
@@ -1023,6 +1454,30 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
mlx5e_enable_ecn(rq, skb);
skb->protocol = eth_type_trans(skb, netdev);
+
+ if (unlikely(mlx5e_skb_is_multicast(skb)))
+ stats->mcast_packets++;
+}
+
+static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->packets++;
+ stats->gro_packets++;
+ stats->bytes += cqe_bcnt;
+ stats->gro_bytes += cqe_bcnt;
+ if (NAPI_GRO_CB(skb)->count != 1)
+ return;
+ mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+ skb_reset_network_header(skb);
+ if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) {
+ napi_gro_receive(rq->cq.napi, skb);
+ rq->hw_gro_data->skb = NULL;
+ }
}
static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
@@ -1040,7 +1495,7 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
static inline
struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
u32 frag_size, u16 headroom,
- u32 cqe_bcnt)
+ u32 cqe_bcnt, u32 metasize)
{
struct sk_buff *skb = build_skb(va, frag_size);
@@ -1052,85 +1507,171 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
skb_reserve(skb, headroom);
skb_put(skb, cqe_bcnt);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
return skb;
}
-struct sk_buff *
-mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
+ u32 len, struct xdp_buff *xdp)
+{
+ xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
+ xdp_prepare_buff(xdp, va, headroom, len, true);
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
{
- struct mlx5e_dma_info *di = wi->di;
+ union mlx5e_alloc_unit *au = wi->au;
u16 rx_headroom = rq->buff.headroom;
+ struct bpf_prog *prog;
struct sk_buff *skb;
+ u32 metasize = 0;
void *va, *data;
- bool consumed;
+ dma_addr_t addr;
u32 frag_size;
- va = page_address(di->page) + wi->offset;
+ va = page_address(au->page) + wi->offset;
data = va + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
- frag_size, DMA_FROM_DEVICE);
- prefetchw(va); /* xdp_frame data area */
- prefetch(data);
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
+ frag_size, rq->buff.map_dir);
+ net_prefetch(data);
- rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
- rcu_read_unlock();
- if (consumed)
- return NULL; /* page/packet was consumed by XDP */
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, au->page, prog, &xdp))
+ return NULL; /* page/packet was consumed by XDP */
- skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ cqe_bcnt = xdp.data_end - xdp.data;
+ }
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
if (unlikely(!skb))
return NULL;
/* queue up for recycling/reuse */
- page_ref_inc(di->page);
+ page_ref_inc(au->page);
return skb;
}
-struct sk_buff *
-mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+static struct sk_buff *
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
{
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
struct mlx5e_wqe_frag_info *head_wi = wi;
- u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
- u16 frag_headlen = headlen;
- u16 byte_cnt = cqe_bcnt - headlen;
+ union mlx5e_alloc_unit *au = wi->au;
+ u16 rx_headroom = rq->buff.headroom;
+ struct skb_shared_info *sinfo;
+ u32 frag_consumed_bytes;
+ struct bpf_prog *prog;
+ struct xdp_buff xdp;
struct sk_buff *skb;
+ dma_addr_t addr;
+ u32 truesize;
+ void *va;
- /* XDP is not supported in this configuration, as incoming packets
- * might spread among multiple pages.
- */
- skb = napi_alloc_skb(rq->cq.napi,
- ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
- if (unlikely(!skb)) {
- rq->stats->buff_alloc_err++;
- return NULL;
- }
+ va = page_address(au->page) + wi->offset;
+ frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
+ rq->buff.frame0_sz, rq->buff.map_dir);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(va + rx_headroom);
+
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
+ truesize = 0;
+
+ cqe_bcnt -= frag_consumed_bytes;
+ frag_info++;
+ wi++;
+
+ while (cqe_bcnt) {
+ skb_frag_t *frag;
+
+ au = wi->au;
+
+ frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
+ frag_consumed_bytes, rq->buff.map_dir);
+
+ if (!xdp_buff_has_frags(&xdp)) {
+ /* Init on the first fragment to avoid cold cache access
+ * when possible.
+ */
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, au->page);
+ skb_frag_off_set(frag, wi->offset);
+ skb_frag_size_set(frag, frag_consumed_bytes);
- prefetchw(skb->data);
+ if (page_is_pfmemalloc(au->page))
+ xdp_buff_set_frag_pfmemalloc(&xdp);
- while (byte_cnt) {
- u16 frag_consumed_bytes =
- min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
+ sinfo->xdp_frags_size += frag_consumed_bytes;
+ truesize += frag_info->frag_stride;
- mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen,
- frag_consumed_bytes, frag_info->frag_stride);
- byte_cnt -= frag_consumed_bytes;
- frag_headlen = 0;
+ cqe_bcnt -= frag_consumed_bytes;
frag_info++;
wi++;
}
- /* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
- /* skb linear part was allocated with headlen and aligned to long */
- skb->tail += headlen;
- skb->len += headlen;
+ au = head_wi->au;
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
+ if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ int i;
+
+ for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
+ mlx5e_put_rx_frag(rq, &head_wi[i], true);
+ }
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz,
+ xdp.data - xdp.data_hard_start,
+ xdp.data_end - xdp.data,
+ xdp.data - xdp.data_meta);
+ if (unlikely(!skb))
+ return NULL;
+
+ page_ref_inc(au->page);
+
+ if (unlikely(xdp_buff_has_frags(&xdp))) {
+ int i;
+
+ /* sinfo->nr_frags is reset by build_skb, calculate again. */
+ xdp_update_skb_shared_info(skb, wi - head_wi - 1,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&xdp));
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ page_ref_inc(skb_frag_page(frag));
+ }
+ }
return skb;
}
@@ -1138,13 +1679,22 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
+ struct mlx5e_priv *priv = rq->priv;
if (cqe_syndrome_needs_recover(err_cqe->syndrome) &&
- !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state))
- queue_work(rq->channel->priv->wq, &rq->recover_work);
+ !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) {
+ mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe);
+ queue_work(priv->wq, &rq->recover_work);
+ }
}
-void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ trigger_report(rq, cqe);
+ rq->stats->wqe_err++;
+}
+
+static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
struct mlx5e_wqe_frag_info *wi;
@@ -1157,15 +1707,15 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
- skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe,
mlx5e_skb_from_cqe_linear,
mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
+ mlx5e_xsk_skb_from_cqe_linear,
+ rq, wi, cqe_bcnt);
if (!skb) {
/* probably for XDP */
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -1178,6 +1728,13 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
}
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (mlx5e_cqe_regb_chain(cqe))
+ if (!mlx5e_tc_update_skb(cqe, skb)) {
+ dev_kfree_skb_any(skb);
+ goto free_wqe;
+ }
+
napi_gro_receive(rq->cq.napi, skb);
free_wqe:
@@ -1187,7 +1744,7 @@ wq_cyc_pop:
}
#ifdef CONFIG_MLX5_ESWITCH
-void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct net_device *netdev = rq->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1204,11 +1761,14 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
- skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ rq, wi, cqe_bcnt);
if (!skb) {
/* probably for XDP */
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -1225,25 +1785,108 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (rep->vlan && skb_vlan_tag_present(skb))
skb_vlan_pop(skb);
- napi_gro_receive(rq->cq.napi, skb);
+ mlx5e_rep_tc_receive(cqe, rq, skb);
free_wqe:
mlx5e_free_rx_wqe(rq, wi, true);
wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
+
+static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
+ u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
+ u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
+ u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
+ u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
+ struct mlx5e_rx_wqe_ll *wqe;
+ struct mlx5_wq_ll *wq;
+ struct sk_buff *skb;
+ u16 cqe_bcnt;
+
+ wi->consumed_strides += cstrides;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+
+ skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+ mlx5e_skb_from_cqe_mpwrq_linear,
+ mlx5e_skb_from_cqe_mpwrq_nonlinear,
+ rq, wi, cqe_bcnt, head_offset, page_idx);
+ if (!skb)
+ goto mpwrq_cqe_out;
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ mlx5e_rep_tc_receive(cqe, rq, skb);
+
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
+ .handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
+};
#endif
-struct sk_buff *
+static void
+mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
+ union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset)
+{
+ net_prefetchw(skb->data);
+
+ while (data_bcnt) {
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
+ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
+ unsigned int truesize;
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ truesize = pg_consumed_bytes;
+ else
+ truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_frag(rq, skb, au, data_offset,
+ pg_consumed_bytes, truesize);
+
+ data_bcnt -= pg_consumed_bytes;
+ data_offset = 0;
+ au++;
+ }
+}
+
+static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
{
+ union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
u32 frag_offset = head_offset + headlen;
u32 byte_cnt = cqe_bcnt - headlen;
- struct mlx5e_dma_info *head_di = di;
+ union mlx5e_alloc_unit *head_au = au;
struct sk_buff *skb;
+ dma_addr_t addr;
skb = napi_alloc_skb(rq->cq.napi,
ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
@@ -1252,27 +1895,19 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
if (unlikely(frag_offset >= PAGE_SIZE)) {
- di++;
+ au++;
frag_offset -= PAGE_SIZE;
}
- while (byte_cnt) {
- u32 pg_consumed_bytes =
- min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
- unsigned int truesize =
- ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
-
- mlx5e_add_skb_frag(rq, skb, di, frag_offset,
- pg_consumed_bytes, truesize);
- byte_cnt -= pg_consumed_bytes;
- frag_offset = 0;
- di++;
- }
+ mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
/* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
+ addr = page_pool_get_dma_addr(head_au->page);
+ mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
+ head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
@@ -1280,17 +1915,18 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return skb;
}
-struct sk_buff *
+static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
{
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
u16 rx_headroom = rq->buff.headroom;
- u32 cqe_bcnt32 = cqe_bcnt;
+ struct bpf_prog *prog;
struct sk_buff *skb;
+ u32 metasize = 0;
void *va, *data;
+ dma_addr_t addr;
u32 frag_size;
- bool consumed;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -1298,43 +1934,233 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return NULL;
}
- va = page_address(di->page) + head_offset;
+ va = page_address(au->page) + head_offset;
data = va + rx_headroom;
- frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
-
- dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
- frag_size, DMA_FROM_DEVICE);
- prefetchw(va); /* xdp_frame data area */
- prefetch(data);
-
- rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
- rcu_read_unlock();
- if (consumed) {
- if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
- __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
- return NULL; /* page/packet was consumed by XDP */
- }
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
+ frag_size, rq->buff.map_dir);
+ net_prefetch(data);
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
- skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ cqe_bcnt = xdp.data_end - xdp.data;
+ }
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
if (unlikely(!skb))
return NULL;
/* queue up for recycling/reuse */
- page_ref_inc(di->page);
+ page_ref_inc(au->page);
+
+ return skb;
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ struct mlx5_cqe64 *cqe, u16 header_index)
+{
+ struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
+ u16 head_offset = head->addr & (PAGE_SIZE - 1);
+ u16 head_size = cqe->shampo.header_size;
+ u16 rx_headroom = rq->buff.headroom;
+ struct sk_buff *skb = NULL;
+ void *hdr, *data;
+ u32 frag_size;
+
+ hdr = page_address(head->page) + head_offset;
+ data = hdr + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
+
+ if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
+ /* build SKB around header */
+ dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
+ prefetchw(hdr);
+ prefetch(data);
+ skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
+
+ if (unlikely(!skb))
+ return NULL;
+ /* queue up for recycling/reuse */
+ page_ref_inc(head->page);
+
+ } else {
+ /* allocate SKB and copy header for large header */
+ rq->stats->gro_large_hds++;
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(head_size, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+ mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
+ head_offset + rx_headroom,
+ rx_headroom, head_size);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += head_size;
+ skb->len += head_size;
+ }
return skb;
}
-void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+static void
+mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz)
+{
+ skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+ unsigned int frag_size = skb_frag_size(last_frag);
+ unsigned int frag_truesize;
+
+ frag_truesize = ALIGN(frag_size, BIT(log_stride_sz));
+ skb->truesize += frag_truesize - frag_size;
+}
+
+static void
+mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->gro_skbs++;
+ if (likely(skb_shinfo(skb)->nr_frags))
+ mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz);
+ if (NAPI_GRO_CB(skb)->count > 1)
+ mlx5e_shampo_update_hdr(rq, cqe, match);
+ napi_gro_receive(rq->cq.napi, skb);
+ rq->hw_gro_data->skb = NULL;
+}
+
+static bool
+mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
+{
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+
+ return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
+}
+
+static void
+mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u64 addr = shampo->info[header_index].addr;
+
+ if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
+ shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
+ mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true);
+ }
+ bitmap_clear(shampo->bitmap, header_index, 1);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+ u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
+ u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u16 head_size = cqe->shampo.header_size;
+ struct sk_buff **skb = &rq->hw_gro_data->skb;
+ bool flush = cqe->shampo.flush;
+ bool match = cqe->shampo.match;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct mlx5e_rx_wqe_ll *wqe;
+ union mlx5e_alloc_unit *au;
+ struct mlx5e_mpw_info *wi;
+ struct mlx5_wq_ll *wq;
+
+ wi = mlx5e_get_mpw_info(rq, wqe_id);
+ wi->consumed_strides += cstrides;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ stats->gro_match_packets += match;
+
+ if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
+ match = false;
+ mlx5e_shampo_flush_skb(rq, cqe, match);
+ }
+
+ if (!*skb) {
+ if (likely(head_size))
+ *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ else
+ *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
+ page_idx);
+ if (unlikely(!*skb))
+ goto free_hd_entry;
+
+ NAPI_GRO_CB(*skb)->count = 1;
+ skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
+ } else {
+ NAPI_GRO_CB(*skb)->count++;
+ if (NAPI_GRO_CB(*skb)->count == 2 &&
+ rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) {
+ void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+ int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff -
+ sizeof(struct iphdr);
+ struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff);
+
+ rq->hw_gro_data->second_ip_id = ntohs(iph->id);
+ }
+ }
+
+ if (likely(head_size)) {
+ au = &wi->alloc_units[page_idx];
+ mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset);
+ }
+
+ mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
+ if (flush)
+ mlx5e_shampo_flush_skb(rq, cqe, match);
+free_hd_entry:
+ mlx5e_free_rx_shampo_hd_entry(rq, header_index);
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
- u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
- u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
+ u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
struct mlx5e_rx_wqe_ll *wqe;
struct mlx5_wq_ll *wq;
struct sk_buff *skb;
@@ -1343,8 +2169,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -1358,14 +2183,22 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
- skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+ skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq,
mlx5e_skb_from_cqe_mpwrq_linear,
mlx5e_skb_from_cqe_mpwrq_nonlinear,
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear,
rq, wi, cqe_bcnt, head_offset, page_idx);
if (!skb)
goto mpwrq_cqe_out;
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (mlx5e_cqe_regb_chain(cqe))
+ if (!mlx5e_tc_update_skb(cqe, skb)) {
+ dev_kfree_skb_any(skb);
+ goto mpwrq_cqe_out;
+ }
+
napi_gro_receive(rq->cq.napi, skb);
mpwrq_cqe_out:
@@ -1388,12 +2221,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return 0;
- if (rq->page_pool)
- page_pool_nid_changed(rq->page_pool, numa_mem_id());
-
if (rq->cqd.left) {
work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
- if (rq->cqd.left || work_done >= budget)
+ if (work_done >= budget)
goto out;
}
@@ -1414,12 +2244,16 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
mlx5_cqwq_pop(cqwq);
- INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
- mlx5e_handle_rx_cqe, rq, cqe);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
+ rq, cqe);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
out:
- if (rq->xdp_prog)
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
+ mlx5e_shampo_flush_skb(rq, NULL, false);
+
+ if (rcu_access_pointer(rq->xdp_prog))
mlx5e_xdp_rx_poll_complete(rq);
mlx5_cqwq_update_db_record(cqwq);
@@ -1432,6 +2266,7 @@ out:
#ifdef CONFIG_MLX5_CORE_IPOIB
+#define MLX5_IB_GRH_SGID_OFFSET 8
#define MLX5_IB_GRH_DGID_OFFSET 24
#define MLX5_GID_SIZE 16
@@ -1445,6 +2280,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
struct net_device *netdev;
struct mlx5e_priv *priv;
char *pseudo_header;
+ u32 flags_rqpn;
u32 qpn;
u8 *dgid;
u8 g;
@@ -1464,9 +2300,10 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
priv = mlx5i_epriv(netdev);
tstamp = &priv->tstamp;
- stats = &priv->channel_stats[rq->ix].rq;
+ stats = rq->stats;
- g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
+ flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
+ g = (flags_rqpn >> 28) & 3;
dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
if ((!g) || dgid[0] != 0xff)
skb->pkt_type = PACKET_HOST;
@@ -1475,9 +2312,15 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
else
skb->pkt_type = PACKET_MULTICAST;
- /* TODO: IB/ipoib: Allow mcast packets from other VFs
- * 68996a6e760e5c74654723eeb57bf65628ae87f4
+ /* Drop packets that this interface sent, ie multicast packets
+ * that the HCA has replicated.
*/
+ if (g && (qpn == (flags_rqpn & 0xffffff)) &&
+ (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
+ MLX5_GID_SIZE) == 0)) {
+ skb->dev = NULL;
+ return;
+ }
skb_pull(skb, MLX5_IB_GRH_BYTES);
@@ -1493,9 +2336,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
}
if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
- skb_hwtstamps(skb)->hwtstamp =
- mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
-
+ skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
+ rq->clock, get_cqe_ts(cqe));
skb_record_rx_queue(skb, rq->ix);
if (likely(netdev->features & NETIF_F_RXHASH))
@@ -1513,7 +2355,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
stats->bytes += cqe_bcnt;
}
-void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
struct mlx5e_wqe_frag_info *wi;
@@ -1533,7 +2375,7 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
mlx5e_skb_from_cqe_linear,
mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
+ rq, wi, cqe_bcnt);
if (!skb)
goto wq_free_wqe;
@@ -1549,44 +2391,104 @@ wq_free_wqe:
mlx5_wq_cyc_pop(wq);
}
+const struct mlx5e_rx_handlers mlx5i_rx_handlers = {
+ .handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .handle_rx_cqe_mpwqe = NULL, /* Not supported */
+};
#endif /* CONFIG_MLX5_CORE_IPOIB */
-#ifdef CONFIG_MLX5_EN_IPSEC
+int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk)
+{
+ struct net_device *netdev = rq->netdev;
+ struct mlx5_core_dev *mdev = rq->mdev;
+ struct mlx5e_priv *priv = rq->priv;
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_mpwrq_linear :
+ mlx5e_skb_from_cqe_mpwrq_nonlinear;
+ rq->post_wqes = mlx5e_post_rx_mpwqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
+ return -EINVAL;
+ }
+ } else {
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
+ return -EINVAL;
+ }
+ }
+
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ rq->wqe.skb_from_cqe = xsk ?
+ mlx5e_xsk_skb_from_cqe_linear :
+ mlx5e_rx_is_linear_skb(mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
+ rq->post_wqes = mlx5e_post_rx_wqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of RQ is not set\n");
+ return -EINVAL;
+ }
+ }
-void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+ return 0;
+}
+
+static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
+ struct mlx5e_priv *priv = netdev_priv(rq->netdev);
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
struct mlx5e_wqe_frag_info *wi;
+ struct devlink_port *dl_port;
struct sk_buff *skb;
u32 cqe_bcnt;
+ u16 trap_id;
u16 ci;
+ trap_id = get_cqe_flow_tag(cqe);
ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
wi = get_frag(rq, ci);
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
rq->stats->wqe_err++;
- goto wq_free_wqe;
+ goto free_wqe;
}
- skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
- mlx5e_skb_from_cqe_linear,
- mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
- if (unlikely(!skb)) /* a DROP, save the page-reuse checks */
- goto wq_free_wqe;
-
- skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
- if (unlikely(!skb))
- goto wq_free_wqe;
+ skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt);
+ if (!skb)
+ goto free_wqe;
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
- napi_gro_receive(rq->cq.napi, skb);
+ skb_push(skb, ETH_HLEN);
-wq_free_wqe:
- mlx5e_free_rx_wqe(rq, wi, true);
+ dl_port = mlx5e_devlink_get_dl_port(priv);
+ mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
+ dev_kfree_skb_any(skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, false);
mlx5_wq_cyc_pop(wq);
}
-#endif /* CONFIG_MLX5_EN_IPSEC */
+void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params)
+{
+ rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
+ rq->post_wqes = mlx5e_post_rx_wqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+ rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index bbff8d8ded76..08a75654f5f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -30,36 +30,12 @@
* SOFTWARE.
*/
-#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <net/udp.h>
#include "en.h"
#include "en/port.h"
-
-enum {
- MLX5E_ST_LINK_STATE,
- MLX5E_ST_LINK_SPEED,
- MLX5E_ST_HEALTH_INFO,
-#ifdef CONFIG_INET
- MLX5E_ST_LOOPBACK,
-#endif
- MLX5E_ST_NUM,
-};
-
-const char mlx5e_self_tests[MLX5E_ST_NUM][ETH_GSTRING_LEN] = {
- "Link Test",
- "Speed Test",
- "Health Test",
-#ifdef CONFIG_INET
- "Loopback Test",
-#endif
-};
-
-int mlx5e_self_test_num(struct mlx5e_priv *priv)
-{
- return ARRAY_SIZE(mlx5e_self_tests);
-}
+#include "eswitch.h"
static int mlx5e_test_health_info(struct mlx5e_priv *priv)
{
@@ -115,7 +91,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
skb_reserve(skb, NET_IP_ALIGN);
/* Reserve for ethernet and IP header */
@@ -234,7 +210,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
return err;
}
- err = mlx5e_refresh_tirs(priv, true);
+ err = mlx5e_refresh_tirs(priv, true, false);
if (err)
goto out;
@@ -263,7 +239,15 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
mlx5_nic_vport_update_local_lb(priv->mdev, false);
dev_remove_pack(&lbtp->pt);
- mlx5e_refresh_tirs(priv, false);
+ mlx5e_refresh_tirs(priv, false, false);
+}
+
+static int mlx5e_cond_loopback(struct mlx5e_priv *priv)
+{
+ if (is_mdev_switchdev_mode(priv->mdev))
+ return -EOPNOTSUPP;
+
+ return 0;
}
#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
@@ -314,37 +298,48 @@ out:
}
#endif
-static int (*mlx5e_st_func[MLX5E_ST_NUM])(struct mlx5e_priv *) = {
- mlx5e_test_link_state,
- mlx5e_test_link_speed,
- mlx5e_test_health_info,
+typedef int (*mlx5e_st_func)(struct mlx5e_priv *);
+
+struct mlx5e_st {
+ char name[ETH_GSTRING_LEN];
+ mlx5e_st_func st_func;
+ mlx5e_st_func cond_func;
+};
+
+static struct mlx5e_st mlx5e_sts[] = {
+ { "Link Test", mlx5e_test_link_state },
+ { "Speed Test", mlx5e_test_link_speed },
+ { "Health Test", mlx5e_test_health_info },
#ifdef CONFIG_INET
- mlx5e_test_loopback,
+ { "Loopback Test", mlx5e_test_loopback, mlx5e_cond_loopback },
#endif
};
+#define MLX5E_ST_NUM ARRAY_SIZE(mlx5e_sts)
+
void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
u64 *buf)
{
struct mlx5e_priv *priv = netdev_priv(ndev);
- int i;
-
- memset(buf, 0, sizeof(u64) * MLX5E_ST_NUM);
+ int i, count = 0;
mutex_lock(&priv->state_lock);
netdev_info(ndev, "Self test begin..\n");
for (i = 0; i < MLX5E_ST_NUM; i++) {
- netdev_info(ndev, "\t[%d] %s start..\n",
- i, mlx5e_self_tests[i]);
- buf[i] = mlx5e_st_func[i](priv);
- netdev_info(ndev, "\t[%d] %s end: result(%lld)\n",
- i, mlx5e_self_tests[i], buf[i]);
+ struct mlx5e_st st = mlx5e_sts[i];
+
+ if (st.cond_func && st.cond_func(priv))
+ continue;
+ netdev_info(ndev, "\t[%d] %s start..\n", i, st.name);
+ buf[count] = st.st_func(priv);
+ netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]);
+ count++;
}
mutex_unlock(&priv->state_lock);
- for (i = 0; i < MLX5E_ST_NUM; i++) {
+ for (i = 0; i < count; i++) {
if (buf[i]) {
etest->flags |= ETH_TEST_FL_FAILED;
break;
@@ -353,3 +348,24 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
netdev_info(ndev, "Self test out: status flags(0x%x)\n",
etest->flags);
}
+
+int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data)
+{
+ int i, count = 0;
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ struct mlx5e_st st = mlx5e_sts[i];
+
+ if (st.cond_func && st.cond_func(priv))
+ continue;
+ if (data)
+ strcpy(data + count * ETH_GSTRING_LEN, st.name);
+ count++;
+ }
+ return count;
+}
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv)
+{
+ return mlx5e_self_test_fill_strings(priv, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 30b216d9284c..03c1841970f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -32,8 +32,14 @@
#include "lib/mlx5.h"
#include "en.h"
-#include "en_accel/ipsec.h"
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
+#include "en_accel/en_accel.h"
+#include "en/ptp.h"
+#include "en/port.h"
+
+#ifdef CONFIG_PAGE_POOL_STATS
+#include <net/page_pool.h>
+#endif
static unsigned int stats_grps_num(struct mlx5e_priv *priv)
{
@@ -54,6 +60,18 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv)
return total;
}
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ int i;
+
+ for (i = num_stats_grps - 1; i >= 0; i--)
+ if (stats_grps[i]->update_stats &&
+ stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS)
+ stats_grps[i]->update_stats(priv);
+}
+
void mlx5e_stats_update(struct mlx5e_priv *priv)
{
mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
@@ -98,11 +116,12 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
@@ -114,6 +133,11 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
@@ -163,6 +187,31 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
+#ifdef CONFIG_PAGE_POOL_STATS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) },
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) },
+#endif
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
@@ -221,6 +270,250 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw)
return idx;
}
+static void mlx5e_stats_grp_sw_update_stats_xdp_red(struct mlx5e_sw_stats *s,
+ struct mlx5e_xdpsq_stats *xdpsq_red_stats)
+{
+ s->tx_xdp_xmit += xdpsq_red_stats->xmit;
+ s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
+ s->tx_xdp_inlnw += xdpsq_red_stats->inlnw;
+ s->tx_xdp_nops += xdpsq_red_stats->nops;
+ s->tx_xdp_full += xdpsq_red_stats->full;
+ s->tx_xdp_err += xdpsq_red_stats->err;
+ s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xdpsq(struct mlx5e_sw_stats *s,
+ struct mlx5e_xdpsq_stats *xdpsq_stats)
+{
+ s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
+ s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
+ s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
+ s->rx_xdp_tx_nops += xdpsq_stats->nops;
+ s->rx_xdp_tx_full += xdpsq_stats->full;
+ s->rx_xdp_tx_err += xdpsq_stats->err;
+ s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xsksq(struct mlx5e_sw_stats *s,
+ struct mlx5e_xdpsq_stats *xsksq_stats)
+{
+ s->tx_xsk_xmit += xsksq_stats->xmit;
+ s->tx_xsk_mpwqe += xsksq_stats->mpwqe;
+ s->tx_xsk_inlnw += xsksq_stats->inlnw;
+ s->tx_xsk_full += xsksq_stats->full;
+ s->tx_xsk_err += xsksq_stats->err;
+ s->tx_xsk_cqes += xsksq_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xskrq(struct mlx5e_sw_stats *s,
+ struct mlx5e_rq_stats *xskrq_stats)
+{
+ s->rx_xsk_packets += xskrq_stats->packets;
+ s->rx_xsk_bytes += xskrq_stats->bytes;
+ s->rx_xsk_csum_complete += xskrq_stats->csum_complete;
+ s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary;
+ s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
+ s->rx_xsk_csum_none += xskrq_stats->csum_none;
+ s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark;
+ s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets;
+ s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop;
+ s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect;
+ s->rx_xsk_wqe_err += xskrq_stats->wqe_err;
+ s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes;
+ s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides;
+ s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop;
+ s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err;
+ s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks;
+ s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts;
+ s->rx_xsk_congst_umr += xskrq_stats->congst_umr;
+ s->rx_xsk_arfs_err += xskrq_stats->arfs_err;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
+ struct mlx5e_rq_stats *rq_stats)
+{
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+ s->rx_lro_packets += rq_stats->lro_packets;
+ s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_gro_packets += rq_stats->gro_packets;
+ s->rx_gro_bytes += rq_stats->gro_bytes;
+ s->rx_gro_skbs += rq_stats->gro_skbs;
+ s->rx_gro_match_packets += rq_stats->gro_match_packets;
+ s->rx_gro_large_hds += rq_stats->gro_large_hds;
+ s->rx_ecn_mark += rq_stats->ecn_mark;
+ s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
+ s->rx_csum_none += rq_stats->csum_none;
+ s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
+ s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
+ s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+ s->rx_xdp_drop += rq_stats->xdp_drop;
+ s->rx_xdp_redirect += rq_stats->xdp_redirect;
+ s->rx_wqe_err += rq_stats->wqe_err;
+ s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes;
+ s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
+ s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop;
+ s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
+ s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
+ s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+ s->rx_cache_reuse += rq_stats->cache_reuse;
+ s->rx_cache_full += rq_stats->cache_full;
+ s->rx_cache_empty += rq_stats->cache_empty;
+ s->rx_cache_busy += rq_stats->cache_busy;
+ s->rx_cache_waive += rq_stats->cache_waive;
+ s->rx_congst_umr += rq_stats->congst_umr;
+ s->rx_arfs_err += rq_stats->arfs_err;
+ s->rx_recover += rq_stats->recover;
+#ifdef CONFIG_PAGE_POOL_STATS
+ s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast;
+ s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow;
+ s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty;
+ s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill;
+ s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive;
+ s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order;
+ s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached;
+ s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full;
+ s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring;
+ s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full;
+ s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets;
+ s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes;
+ s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt;
+ s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start;
+ s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end;
+ s->rx_tls_resync_req_skip += rq_stats->tls_resync_req_skip;
+ s->rx_tls_resync_res_ok += rq_stats->tls_resync_res_ok;
+ s->rx_tls_resync_res_retry += rq_stats->tls_resync_res_retry;
+ s->rx_tls_resync_res_skip += rq_stats->tls_resync_res_skip;
+ s->rx_tls_err += rq_stats->tls_err;
+#endif
+}
+
+static void mlx5e_stats_grp_sw_update_stats_ch_stats(struct mlx5e_sw_stats *s,
+ struct mlx5e_ch_stats *ch_stats)
+{
+ s->ch_events += ch_stats->events;
+ s->ch_poll += ch_stats->poll;
+ s->ch_arm += ch_stats->arm;
+ s->ch_aff_change += ch_stats->aff_change;
+ s->ch_force_irq += ch_stats->force_irq;
+ s->ch_eq_rearm += ch_stats->eq_rearm;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_sq(struct mlx5e_sw_stats *s,
+ struct mlx5e_sq_stats *sq_stats)
+{
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_tso_packets += sq_stats->tso_packets;
+ s->tx_tso_bytes += sq_stats->tso_bytes;
+ s->tx_tso_inner_packets += sq_stats->tso_inner_packets;
+ s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
+ s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
+ s->tx_nop += sq_stats->nop;
+ s->tx_mpwqe_blks += sq_stats->mpwqe_blks;
+ s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts;
+ s->tx_queue_stopped += sq_stats->stopped;
+ s->tx_queue_wake += sq_stats->wake;
+ s->tx_queue_dropped += sq_stats->dropped;
+ s->tx_cqe_err += sq_stats->cqe_err;
+ s->tx_recover += sq_stats->recover;
+ s->tx_xmit_more += sq_stats->xmit_more;
+ s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+ s->tx_csum_none += sq_stats->csum_none;
+ s->tx_csum_partial += sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+ s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
+ s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes;
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
+ s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data;
+ s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
+ s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
+#endif
+ s->tx_cqes += sq_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
+ struct mlx5e_sw_stats *s)
+{
+ int i;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return;
+
+ mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
+
+ if (priv->tx_ptp_opened) {
+ for (i = 0; i < priv->max_opened_tc; i++) {
+ mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]);
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+ }
+ if (priv->rx_ptp_opened) {
+ mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq);
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+}
+
+static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
+ struct mlx5e_sw_stats *s)
+{
+ struct mlx5e_sq_stats **stats;
+ u16 max_qos_sqs;
+ int i;
+
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ stats = READ_ONCE(priv->htb_qos_sq_stats);
+
+ for (i = 0; i < max_qos_sqs; i++) {
+ mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+}
+
+#ifdef CONFIG_PAGE_POOL_STATS
+static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c)
+{
+ struct mlx5e_rq_stats *rq_stats = c->rq.stats;
+ struct page_pool *pool = c->rq.page_pool;
+ struct page_pool_stats stats = { 0 };
+
+ if (!page_pool_get_stats(pool, &stats))
+ return;
+
+ rq_stats->pp_alloc_fast = stats.alloc_stats.fast;
+ rq_stats->pp_alloc_slow = stats.alloc_stats.slow;
+ rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order;
+ rq_stats->pp_alloc_empty = stats.alloc_stats.empty;
+ rq_stats->pp_alloc_waive = stats.alloc_stats.waive;
+ rq_stats->pp_alloc_refill = stats.alloc_stats.refill;
+
+ rq_stats->pp_recycle_cached = stats.recycle_stats.cached;
+ rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full;
+ rq_stats->pp_recycle_ring = stats.recycle_stats.ring;
+ rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full;
+ rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt;
+}
+#else
+static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c)
+{
+}
+#endif
+
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -228,132 +521,33 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
memset(s, 0, sizeof(*s));
- for (i = 0; i < priv->max_nch; i++) {
+ for (i = 0; i < priv->channels.num; i++) /* for active channels only */
+ mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]);
+
+ for (i = 0; i < priv->stats_nch; i++) {
struct mlx5e_channel_stats *channel_stats =
- &priv->channel_stats[i];
- struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
- struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
- struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq;
- struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
- struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
- struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
+ priv->channel_stats[i];
+
int j;
- s->rx_packets += rq_stats->packets;
- s->rx_bytes += rq_stats->bytes;
- s->rx_lro_packets += rq_stats->lro_packets;
- s->rx_lro_bytes += rq_stats->lro_bytes;
- s->rx_ecn_mark += rq_stats->ecn_mark;
- s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
- s->rx_csum_none += rq_stats->csum_none;
- s->rx_csum_complete += rq_stats->csum_complete;
- s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
- s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
- s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
- s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
- s->rx_xdp_drop += rq_stats->xdp_drop;
- s->rx_xdp_redirect += rq_stats->xdp_redirect;
- s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
- s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
- s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
- s->rx_xdp_tx_nops += xdpsq_stats->nops;
- s->rx_xdp_tx_full += xdpsq_stats->full;
- s->rx_xdp_tx_err += xdpsq_stats->err;
- s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
- s->rx_wqe_err += rq_stats->wqe_err;
- s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes;
- s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
- s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop;
- s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
- s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
- s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
- s->rx_cache_reuse += rq_stats->cache_reuse;
- s->rx_cache_full += rq_stats->cache_full;
- s->rx_cache_empty += rq_stats->cache_empty;
- s->rx_cache_busy += rq_stats->cache_busy;
- s->rx_cache_waive += rq_stats->cache_waive;
- s->rx_congst_umr += rq_stats->congst_umr;
- s->rx_arfs_err += rq_stats->arfs_err;
- s->rx_recover += rq_stats->recover;
- s->ch_events += ch_stats->events;
- s->ch_poll += ch_stats->poll;
- s->ch_arm += ch_stats->arm;
- s->ch_aff_change += ch_stats->aff_change;
- s->ch_force_irq += ch_stats->force_irq;
- s->ch_eq_rearm += ch_stats->eq_rearm;
+ mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq);
+ mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq);
+ mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch);
/* xdp redirect */
- s->tx_xdp_xmit += xdpsq_red_stats->xmit;
- s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
- s->tx_xdp_inlnw += xdpsq_red_stats->inlnw;
- s->tx_xdp_nops += xdpsq_red_stats->nops;
- s->tx_xdp_full += xdpsq_red_stats->full;
- s->tx_xdp_err += xdpsq_red_stats->err;
- s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+ mlx5e_stats_grp_sw_update_stats_xdp_red(s, &channel_stats->xdpsq);
/* AF_XDP zero-copy */
- s->rx_xsk_packets += xskrq_stats->packets;
- s->rx_xsk_bytes += xskrq_stats->bytes;
- s->rx_xsk_csum_complete += xskrq_stats->csum_complete;
- s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary;
- s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
- s->rx_xsk_csum_none += xskrq_stats->csum_none;
- s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark;
- s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets;
- s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop;
- s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect;
- s->rx_xsk_wqe_err += xskrq_stats->wqe_err;
- s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes;
- s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides;
- s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop;
- s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err;
- s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks;
- s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts;
- s->rx_xsk_congst_umr += xskrq_stats->congst_umr;
- s->rx_xsk_arfs_err += xskrq_stats->arfs_err;
- s->tx_xsk_xmit += xsksq_stats->xmit;
- s->tx_xsk_mpwqe += xsksq_stats->mpwqe;
- s->tx_xsk_inlnw += xsksq_stats->inlnw;
- s->tx_xsk_full += xsksq_stats->full;
- s->tx_xsk_err += xsksq_stats->err;
- s->tx_xsk_cqes += xsksq_stats->cqes;
+ mlx5e_stats_grp_sw_update_stats_xskrq(s, &channel_stats->xskrq);
+ mlx5e_stats_grp_sw_update_stats_xsksq(s, &channel_stats->xsksq);
for (j = 0; j < priv->max_opened_tc; j++) {
- struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
-
- s->tx_packets += sq_stats->packets;
- s->tx_bytes += sq_stats->bytes;
- s->tx_tso_packets += sq_stats->tso_packets;
- s->tx_tso_bytes += sq_stats->tso_bytes;
- s->tx_tso_inner_packets += sq_stats->tso_inner_packets;
- s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
- s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
- s->tx_nop += sq_stats->nop;
- s->tx_queue_stopped += sq_stats->stopped;
- s->tx_queue_wake += sq_stats->wake;
- s->tx_queue_dropped += sq_stats->dropped;
- s->tx_cqe_err += sq_stats->cqe_err;
- s->tx_recover += sq_stats->recover;
- s->tx_xmit_more += sq_stats->xmit_more;
- s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
- s->tx_csum_none += sq_stats->csum_none;
- s->tx_csum_partial += sq_stats->csum_partial;
-#ifdef CONFIG_MLX5_EN_TLS
- s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
- s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes;
- s->tx_tls_ctx += sq_stats->tls_ctx;
- s->tx_tls_ooo += sq_stats->tls_ooo;
- s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
- s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
- s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
- s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data;
- s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
- s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
-#endif
- s->tx_cqes += sq_stats->cqes;
+ mlx5e_stats_grp_sw_update_stats_sq(s, &channel_stats->sq[j]);
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
barrier();
}
}
+ mlx5e_stats_grp_sw_update_stats_ptp(priv, s);
+ mlx5e_stats_grp_sw_update_stats_qos(priv, s);
}
static const struct counter_desc q_stats_desc[] = {
@@ -411,18 +605,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt)
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt)
{
struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
- u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ int ret;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+
+ if (priv->q_counter) {
+ MLX5_SET(query_q_counter_in, in, counter_set_id,
+ priv->q_counter);
+ ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
+ if (!ret)
+ qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ }
- if (priv->q_counter &&
- !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out,
- sizeof(out)))
- qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
- out, out_of_buffer);
- if (priv->drop_rq_q_counter &&
- !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0,
- out, sizeof(out)))
- qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out,
- out_of_buffer);
+ if (priv->drop_rq_q_counter) {
+ MLX5_SET(query_q_counter_in, in, counter_set_id,
+ priv->drop_rq_q_counter);
+ ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
+ if (!ret)
+ qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ }
}
#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
@@ -436,17 +641,26 @@ static const struct counter_desc vnic_env_stats_dev_oob_desc[] = {
VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) },
};
+static const struct counter_desc vnic_env_stats_drop_desc[] = {
+ { "rx_oversize_pkts_buffer",
+ VNIC_ENV_OFF(vport_env.eth_wqe_too_small) },
+};
+
#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \
(MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \
ARRAY_SIZE(vnic_env_stats_steer_desc) : 0)
#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \
(MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \
ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0)
+#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \
+ (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \
+ ARRAY_SIZE(vnic_env_stats_drop_desc) : 0)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env)
{
return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) +
- NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev);
+ NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) +
+ NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env)
@@ -460,6 +674,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env)
for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
vnic_env_stats_dev_oob_desc[i].format);
+
+ for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_drop_desc[i].format);
+
return idx;
}
@@ -474,24 +693,25 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env)
for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
vnic_env_stats_dev_oob_desc, i);
+
+ for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++)
+ data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_drop_desc, i);
+
return idx;
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
{
u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
- int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
- u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
- if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ if (!mlx5e_stats_grp_vnic_env_num_stats(priv))
return;
- MLX5_SET(query_vnic_env_in, in, opcode,
- MLX5_CMD_OP_QUERY_VNIC_ENV);
- MLX5_SET(query_vnic_env_in, in, op_mod, 0);
- MLX5_SET(query_vnic_env_in, in, other_vport, 0);
- mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+ mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out);
}
#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
@@ -566,15 +786,12 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport)
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport)
{
- int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
u32 *out = (u32 *)priv->stats.vport.query_vport_out;
- u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER);
- MLX5_SET(query_vport_counter_in, in, op_mod, 0);
- MLX5_SET(query_vport_counter_in, in, other_vport, 0);
- mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+ mlx5_cmd_exec_inout(mdev, query_vport_counter, in, out);
}
#define PPORT_802_3_OFF(c) \
@@ -647,6 +864,112 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
+#define MLX5E_READ_CTR64_BE_F(ptr, set, c) \
+ be64_to_cpu(*(__be64 *)((char *)ptr + \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.set.c##_high)))
+
+static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev,
+ u32 *ppcnt_ieee_802_3)
+{
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3,
+ sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+ struct ethtool_pause_stats *pause_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+ pause_stats->tx_pause_frames =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_pause_mac_ctrl_frames_transmitted);
+ pause_stats->rx_pause_frames =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_pause_mac_ctrl_frames_received);
+}
+
+void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+ phy_stats->SymbolErrorDuringCarrier =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_symbol_error_during_carrier);
+}
+
+void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+#define RD(name) \
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, \
+ eth_802_3_cntrs_grp_data_layout, \
+ name)
+
+ mac_stats->FramesTransmittedOK = RD(a_frames_transmitted_ok);
+ mac_stats->FramesReceivedOK = RD(a_frames_received_ok);
+ mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors);
+ mac_stats->OctetsTransmittedOK = RD(a_octets_transmitted_ok);
+ mac_stats->OctetsReceivedOK = RD(a_octets_received_ok);
+ mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok);
+ mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok);
+ mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok);
+ mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok);
+ mac_stats->InRangeLengthErrors = RD(a_in_range_length_errors);
+ mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field);
+ mac_stats->FrameTooLongErrors = RD(a_frame_too_long_errors);
+#undef RD
+}
+
+void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+ ctrl_stats->MACControlFramesTransmitted =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_mac_control_frames_transmitted);
+ ctrl_stats->MACControlFramesReceived =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_mac_control_frames_received);
+ ctrl_stats->UnsupportedOpcodesReceived =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_unsupported_opcodes_received);
+}
+
#define PPORT_2863_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
@@ -758,6 +1081,59 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
+static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = {
+ { 0, 64 },
+ { 65, 127 },
+ { 128, 255 },
+ { 256, 511 },
+ { 512, 1023 },
+ { 1024, 1518 },
+ { 1519, 2047 },
+ { 2048, 4095 },
+ { 4096, 8191 },
+ { 8192, 10239 },
+ {}
+};
+
+void mlx5e_stats_rmon_get(struct mlx5e_priv *priv,
+ struct ethtool_rmon_stats *rmon,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters,
+ sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+#define RD(name) \
+ MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters, \
+ eth_2819_cntrs_grp_data_layout, \
+ name)
+
+ rmon->undersize_pkts = RD(ether_stats_undersize_pkts);
+ rmon->fragments = RD(ether_stats_fragments);
+ rmon->jabbers = RD(ether_stats_jabbers);
+
+ rmon->hist[0] = RD(ether_stats_pkts64octets);
+ rmon->hist[1] = RD(ether_stats_pkts65to127octets);
+ rmon->hist[2] = RD(ether_stats_pkts128to255octets);
+ rmon->hist[3] = RD(ether_stats_pkts256to511octets);
+ rmon->hist[4] = RD(ether_stats_pkts512to1023octets);
+ rmon->hist[5] = RD(ether_stats_pkts1024to1518octets);
+ rmon->hist[6] = RD(ether_stats_pkts1519to2047octets);
+ rmon->hist[7] = RD(ether_stats_pkts2048to4095octets);
+ rmon->hist[8] = RD(ether_stats_pkts4096to8191octets);
+ rmon->hist[9] = RD(ether_stats_pkts8192to10239octets);
+#undef RD
+
+ *ranges = mlx5e_rmon_ranges;
+}
+
#define PPORT_PHY_STATISTICAL_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.phys_layer_statistical_cntrs.c##_high)
@@ -865,6 +1241,123 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
+static int fec_num_lanes(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return 0;
+
+ return MLX5_GET(pmlp_reg, out, width);
+}
+
+static int fec_active_mode(struct mlx5_core_dev *mdev)
+{
+ unsigned long fec_active_long;
+ u32 fec_active;
+
+ if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
+ return MLX5E_FEC_NOFEC;
+
+ fec_active_long = fec_active;
+ return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
+}
+
+#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
+ fec_stats->corrected_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_corrected_blocks_lane##idx); \
+ fec_stats->uncorrectable_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_uncorrectable_blocks_lane##idx); \
+})
+
+static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
+ u32 *ppcnt, u8 lanes)
+{
+ if (lanes > 3) { /* 4 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(3);
+ MLX5E_STATS_SET_FEC_BLOCK(2);
+ }
+ if (lanes > 1) /* 2 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(1);
+ if (lanes > 0) /* 1 lane */
+ MLX5E_STATS_SET_FEC_BLOCK(0);
+}
+
+static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
+{
+ fec_stats->corrected_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_corrected_blocks);
+ fec_stats->uncorrectable_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_uncorrectable_blocks);
+}
+
+static void fec_set_block_stats(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int mode = fec_active_mode(mdev);
+
+ if (mode == MLX5E_FEC_NOFEC)
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ switch (mode) {
+ case MLX5E_FEC_RS_528_514:
+ case MLX5E_FEC_RS_544_514:
+ case MLX5E_FEC_LLRS_272_257_1:
+ fec_set_rs_stats(fec_stats, out);
+ return;
+ case MLX5E_FEC_FIRECODE:
+ fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
+ }
+}
+
+static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical,
+ sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ fec_stats->corrected_bits.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical,
+ phys_layer_statistical_cntrs,
+ phy_corrected_bits);
+}
+
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group))
+ return;
+
+ fec_set_corrected_bits_total(priv, fec_stats);
+ fec_set_block_stats(priv, fec_stats);
+}
+
#define PPORT_ETH_EXT_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
@@ -1424,40 +1917,19 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme)
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; }
-static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec)
-{
- return mlx5e_ipsec_get_count(priv);
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec)
-{
- return idx + mlx5e_ipsec_get_strings(priv,
- data + idx * ETH_GSTRING_LEN);
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec)
-{
- return idx + mlx5e_ipsec_get_stats(priv, data + idx);
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec)
-{
- mlx5e_ipsec_update_stats(priv);
-}
-
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls)
{
- return mlx5e_tls_get_count(priv);
+ return mlx5e_ktls_get_count(priv);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls)
{
- return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+ return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls)
{
- return idx + mlx5e_tls_get_stats(priv, data + idx);
+ return idx + mlx5e_ktls_get_stats(priv, data + idx);
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; }
@@ -1475,6 +1947,11 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
@@ -1492,6 +1969,31 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
+#ifdef CONFIG_PAGE_POOL_STATS
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) },
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) },
+#endif
};
static const struct counter_desc sq_stats_desc[] = {
@@ -1505,10 +2007,11 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
- { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
@@ -1587,6 +2090,105 @@ static const struct counter_desc ch_stats_desc[] = {
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
};
+static const struct counter_desc ptp_sq_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
+static const struct counter_desc ptp_ch_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, events) },
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, poll) },
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, arm) },
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
+};
+
+static const struct counter_desc ptp_cq_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) },
+};
+
+static const struct counter_desc ptp_rq_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
+};
+
+static const struct counter_desc qos_sq_stats_desc[] = {
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+#endif
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc)
#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
@@ -1594,10 +2196,138 @@ static const struct counter_desc ch_stats_desc[] = {
#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc)
#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc)
#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc)
+#define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc)
+#define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc)
+#define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc)
+#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc)
+#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
+{
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb_max_qos_sqs);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
+{
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ u16 max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ int i, qid;
+
+ for (qid = 0; qid < max_qos_sqs; qid++)
+ for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ qos_sq_stats_desc[i].format, qid);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
+{
+ struct mlx5e_sq_stats **stats;
+ u16 max_qos_sqs;
+ int i, qid;
+
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ stats = READ_ONCE(priv->htb_qos_sq_stats);
+
+ for (qid = 0; qid < max_qos_sqs; qid++) {
+ struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
+
+ for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i);
+ }
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
+{
+ int num = NUM_PTP_CH_STATS;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return 0;
+
+ if (priv->tx_ptp_opened)
+ num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc;
+ if (priv->rx_ptp_opened)
+ num += NUM_PTP_RQ_STATS;
+
+ return num;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
+{
+ int i, tc;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return idx;
+
+ for (i = 0; i < NUM_PTP_CH_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ "%s", ptp_ch_stats_desc[i].format);
+
+ if (priv->tx_ptp_opened) {
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_SQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ptp_sq_stats_desc[i].format, tc);
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ptp_cq_stats_desc[i].format, tc);
+ }
+ if (priv->rx_ptp_opened) {
+ for (i = 0; i < NUM_PTP_RQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ptp_rq_stats_desc[i].format, MLX5E_PTP_CHANNEL_IX);
+ }
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp)
+{
+ int i, tc;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return idx;
+
+ for (i = 0; i < NUM_PTP_CH_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch,
+ ptp_ch_stats_desc, i);
+
+ if (priv->tx_ptp_opened) {
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_SQ_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc],
+ ptp_sq_stats_desc, i);
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc],
+ ptp_cq_stats_desc, i);
+ }
+ if (priv->rx_ptp_opened) {
+ for (i = 0; i < NUM_PTP_RQ_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq,
+ ptp_rq_stats_desc, i);
+ }
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ptp) { return; }
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels)
{
- int max_nch = priv->max_nch;
+ int max_nch = priv->stats_nch;
return (NUM_RQ_STATS * max_nch) +
(NUM_CH_STATS * max_nch) +
@@ -1611,7 +2341,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels)
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels)
{
bool is_xsk = priv->xsk.ever_used;
- int max_nch = priv->max_nch;
+ int max_nch = priv->stats_nch;
int i, j, tc;
for (i = 0; i < max_nch; i++)
@@ -1653,27 +2383,27 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels)
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels)
{
bool is_xsk = priv->xsk.ever_used;
- int max_nch = priv->max_nch;
+ int max_nch = priv->stats_nch;
int i, j, tc;
for (i = 0; i < max_nch; i++)
for (j = 0; j < NUM_CH_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch,
ch_stats_desc, j);
for (i = 0; i < max_nch; i++) {
for (j = 0; j < NUM_RQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq,
rq_stats_desc, j);
for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq,
xskrq_stats_desc, j);
for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq,
rq_xdpsq_stats_desc, j);
}
@@ -1681,17 +2411,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels)
for (i = 0; i < max_nch; i++)
for (j = 0; j < NUM_SQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc],
sq_stats_desc, j);
for (i = 0; i < max_nch; i++) {
for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq,
xsksq_stats_desc, j);
for (j = 0; j < NUM_XDPSQ_STATS; j++)
data[idx++] =
- MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq,
xdpsq_stats_desc, j);
}
@@ -1714,8 +2444,9 @@ MLX5E_DEFINE_STATS_GRP(pme, 0);
MLX5E_DEFINE_STATS_GRP(channels, 0);
MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
-static MLX5E_DEFINE_STATS_GRP(ipsec, 0);
static MLX5E_DEFINE_STATS_GRP(tls, 0);
+MLX5E_DEFINE_STATS_GRP(ptp, 0);
+static MLX5E_DEFINE_STATS_GRP(qos, 0);
/* The stats groups order is opposite to the update_stats() order calls */
mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
@@ -1731,10 +2462,17 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
&MLX5E_STATS_GRP(pcie),
&MLX5E_STATS_GRP(per_prio),
&MLX5E_STATS_GRP(pme),
- &MLX5E_STATS_GRP(ipsec),
+#ifdef CONFIG_MLX5_EN_IPSEC
+ &MLX5E_STATS_GRP(ipsec_sw),
+#endif
&MLX5E_STATS_GRP(tls),
&MLX5E_STATS_GRP(channels),
&MLX5E_STATS_GRP(per_port_buff_congest),
+ &MLX5E_STATS_GRP(ptp),
+ &MLX5E_STATS_GRP(qos),
+#ifdef CONFIG_MLX5_EN_MACSEC
+ &MLX5E_STATS_GRP(macsec_hw),
+#endif
};
unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 092b39ffa32a..9f781085be47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -51,6 +51,13 @@
#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
+
+#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
+
struct counter_desc {
char format[ETH_GSTRING_LEN];
size_t offset; /* Byte offset */
@@ -103,6 +110,22 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv);
void mlx5e_stats_update(struct mlx5e_priv *priv);
void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx);
void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data);
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv);
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+ struct ethtool_pause_stats *pause_stats);
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats);
+
+void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_phy_stats *phy_stats);
+void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_mac_stats *mac_stats);
+void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_ctrl_stats *ctrl_stats);
+void mlx5e_stats_rmon_get(struct mlx5e_priv *priv,
+ struct ethtool_rmon_stats *rmon,
+ const struct ethtool_rmon_hist_range **ranges);
/* Concrete NIC Stats */
@@ -117,8 +140,16 @@ struct mlx5e_sw_stats {
u64 tx_tso_inner_bytes;
u64 tx_added_vlan_packets;
u64 tx_nop;
+ u64 tx_mpwqe_blks;
+ u64 tx_mpwqe_pkts;
u64 rx_lro_packets;
u64 rx_lro_bytes;
+ u64 rx_gro_packets;
+ u64 rx_gro_bytes;
+ u64 rx_gro_skbs;
+ u64 rx_gro_match_packets;
+ u64 rx_gro_large_hds;
+ u64 rx_mcast_packets;
u64 rx_ecn_mark;
u64 rx_removed_vlan_packets;
u64 rx_csum_unnecessary;
@@ -174,11 +205,22 @@ struct mlx5e_sw_stats {
u64 ch_aff_change;
u64 ch_force_irq;
u64 ch_eq_rearm;
-
+#ifdef CONFIG_PAGE_POOL_STATS
+ u64 rx_pp_alloc_fast;
+ u64 rx_pp_alloc_slow;
+ u64 rx_pp_alloc_slow_high_order;
+ u64 rx_pp_alloc_empty;
+ u64 rx_pp_alloc_refill;
+ u64 rx_pp_alloc_waive;
+ u64 rx_pp_recycle_cached;
+ u64 rx_pp_recycle_cache_full;
+ u64 rx_pp_recycle_ring;
+ u64 rx_pp_recycle_ring_full;
+ u64 rx_pp_recycle_released_ref;
+#endif
#ifdef CONFIG_MLX5_EN_TLS
u64 tx_tls_encrypted_packets;
u64 tx_tls_encrypted_bytes;
- u64 tx_tls_ctx;
u64 tx_tls_ooo;
u64 tx_tls_dump_packets;
u64 tx_tls_dump_bytes;
@@ -186,6 +228,17 @@ struct mlx5e_sw_stats {
u64 tx_tls_skip_no_sync_data;
u64 tx_tls_drop_no_sync_data;
u64 tx_tls_drop_bypass_req;
+
+ u64 rx_tls_decrypted_packets;
+ u64 rx_tls_decrypted_bytes;
+ u64 rx_tls_resync_req_pkt;
+ u64 rx_tls_resync_req_start;
+ u64 rx_tls_resync_req_end;
+ u64 rx_tls_resync_req_skip;
+ u64 rx_tls_resync_res_ok;
+ u64 rx_tls_resync_res_retry;
+ u64 rx_tls_resync_res_skip;
+ u64 rx_tls_err;
#endif
u64 rx_xsk_packets;
@@ -220,6 +273,10 @@ struct mlx5e_qcounter_stats {
u32 rx_if_down_packets;
};
+#define VNIC_ENV_GET(vnic_env_stats, c) \
+ MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \
+ vport_env.c)
+
struct mlx5e_vnic_env_stats {
__be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
};
@@ -286,6 +343,12 @@ struct mlx5e_rq_stats {
u64 csum_none;
u64 lro_packets;
u64 lro_bytes;
+ u64 gro_packets;
+ u64 gro_bytes;
+ u64 gro_skbs;
+ u64 gro_match_packets;
+ u64 gro_large_hds;
+ u64 mcast_packets;
u64 ecn_mark;
u64 removed_vlan_packets;
u64 xdp_drop;
@@ -305,6 +368,31 @@ struct mlx5e_rq_stats {
u64 congst_umr;
u64 arfs_err;
u64 recover;
+#ifdef CONFIG_PAGE_POOL_STATS
+ u64 pp_alloc_fast;
+ u64 pp_alloc_slow;
+ u64 pp_alloc_slow_high_order;
+ u64 pp_alloc_empty;
+ u64 pp_alloc_refill;
+ u64 pp_alloc_waive;
+ u64 pp_recycle_cached;
+ u64 pp_recycle_cache_full;
+ u64 pp_recycle_ring;
+ u64 pp_recycle_ring_full;
+ u64 pp_recycle_released_ref;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_decrypted_packets;
+ u64 tls_decrypted_bytes;
+ u64 tls_resync_req_pkt;
+ u64 tls_resync_req_start;
+ u64 tls_resync_req_end;
+ u64 tls_resync_req_skip;
+ u64 tls_resync_res_ok;
+ u64 tls_resync_res_retry;
+ u64 tls_resync_res_skip;
+ u64 tls_err;
+#endif
};
struct mlx5e_sq_stats {
@@ -320,10 +408,11 @@ struct mlx5e_sq_stats {
u64 csum_partial_inner;
u64 added_vlan_packets;
u64 nop;
+ u64 mpwqe_blks;
+ u64 mpwqe_pkts;
#ifdef CONFIG_MLX5_EN_TLS
u64 tls_encrypted_packets;
u64 tls_encrypted_bytes;
- u64 tls_ctx;
u64 tls_ooo;
u64 tls_dump_packets;
u64 tls_dump_bytes;
@@ -363,6 +452,15 @@ struct mlx5e_ch_stats {
u64 eq_rearm;
};
+struct mlx5e_ptp_cq_stats {
+ u64 cqe;
+ u64 err_cqe;
+ u64 abort;
+ u64 abort_abs_diff_ns;
+ u64 resync_cqe;
+ u64 resync_event;
+};
+
struct mlx5e_stats {
struct mlx5e_sw_stats sw;
struct mlx5e_qcounter_stats qcnt;
@@ -390,5 +488,8 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio);
extern MLX5E_DECLARE_STATS_GRP(pme);
extern MLX5E_DECLARE_STATS_GRP(channels);
extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
+extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
+extern MLX5E_DECLARE_STATS_GRP(ptp);
+extern MLX5E_DECLARE_STATS_GRP(macsec_hw);
#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 74091f72c9a8..5a6aa61ec82a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -31,405 +31,610 @@
*/
#include <net/flow_dissector.h>
+#include <net/flow_offload.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_skbedit.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
#include <linux/refcount.h>
#include <linux/completion.h>
-#include <net/tc_act/tc_mirred.h>
-#include <net/tc_act/tc_vlan.h>
-#include <net/tc_act/tc_tunnel_key.h>
-#include <net/tc_act/tc_pedit.h>
-#include <net/tc_act/tc_csum.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
#include "en.h"
+#include "en/tc/post_act.h"
#include "en_rep.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
#include "en_tc.h"
#include "eswitch.h"
-#include "eswitch_offloads_chains.h"
#include "fs_core.h"
#include "en/port.h"
#include "en/tc_tun.h"
+#include "en/mapping.h"
+#include "en/tc_ct.h"
+#include "en/mod_hdr.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc/sample.h"
+#include "en/tc/act/act.h"
+#include "en/tc/post_meter.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
+#include "lib/fs_chains.h"
#include "diag/en_tc_tracepoint.h"
+#include <asm/div64.h>
+#include "lag/lag.h"
+#include "lag/mp.h"
-struct mlx5_nic_flow_attr {
- u32 action;
- u32 flow_tag;
- struct mlx5_modify_hdr *modify_hdr;
- u32 hairpin_tirn;
- u8 match_level;
- struct mlx5_flow_table *hairpin_ft;
- struct mlx5_fc *counter;
-};
+#define MLX5E_TC_TABLE_NUM_GROUPS 4
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
-
-enum {
- MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
- MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
- MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
- MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
- MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
- MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
- MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
- MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
- MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
-};
+struct mlx5e_tc_table {
+ /* Protects the dynamic assignment of the t parameter
+ * which is the nic tc root table.
+ */
+ struct mutex t_lock;
+ struct mlx5e_priv *priv;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_table *miss_t;
+ struct mlx5_fs_chains *chains;
+ struct mlx5e_post_act *post_act;
-#define MLX5E_TC_MAX_SPLITS 1
-
-/* Helper struct for accessing a struct containing list_head array.
- * Containing struct
- * |- Helper array
- * [0] Helper item 0
- * |- list_head item 0
- * |- index (0)
- * [1] Helper item 1
- * |- list_head item 1
- * |- index (1)
- * To access the containing struct from one of the list_head items:
- * 1. Get the helper item from the list_head item using
- * helper item =
- * container_of(list_head item, helper struct type, list_head field)
- * 2. Get the contining struct from the helper item and its index in the array:
- * containing struct =
- * container_of(helper item, containing struct type, helper field[index])
- */
-struct encap_flow_item {
- struct mlx5e_encap_entry *e; /* attached encap instance */
- struct list_head list;
- int index;
+ struct rhashtable ht;
+
+ struct mod_hdr_tbl mod_hdr;
+ struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
+ DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+ struct notifier_block netdevice_nb;
+ struct netdev_net_notifier netdevice_nn;
+
+ struct mlx5_tc_ct_priv *ct;
+ struct mapping_ctx *mapping;
};
-struct mlx5e_tc_flow {
- struct rhash_head node;
- struct mlx5e_priv *priv;
- u64 cookie;
- unsigned long flags;
- struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
- /* Flow can be associated with multiple encap IDs.
- * The number of encaps is bounded by the number of supported
- * destinations.
+struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
+ [CHAIN_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+ .moffset = 0,
+ .mlen = 16,
+ },
+ [VPORT_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+ .moffset = 16,
+ .mlen = 16,
+ },
+ [TUNNEL_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
+ .moffset = 8,
+ .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
+ .soffset = MLX5_BYTE_OFF(fte_match_param,
+ misc_parameters_2.metadata_reg_c_1),
+ },
+ [ZONE_TO_REG] = zone_to_reg_ct,
+ [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
+ [CTSTATE_TO_REG] = ctstate_to_reg_ct,
+ [MARK_TO_REG] = mark_to_reg_ct,
+ [LABELS_TO_REG] = labels_to_reg_ct,
+ [FTEID_TO_REG] = fteid_to_reg_ct,
+ /* For NIC rules we store the restore metadata directly
+ * into reg_b that is passed to SW since we don't
+ * jump between steering domains.
*/
- struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5e_tc_flow *peer_flow;
- struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
- struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
- struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
- struct list_head hairpin; /* flows sharing the same hairpin */
- struct list_head peer; /* flows with peer flow */
- struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
- int tmp_efi_index;
- struct list_head tmp_list; /* temporary flow list used by neigh update */
- refcount_t refcnt;
- struct rcu_head rcu_head;
- struct completion init_done;
- union {
- struct mlx5_esw_flow_attr esw_attr[0];
- struct mlx5_nic_flow_attr nic_attr[0];
- };
+ [NIC_CHAIN_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
+ .moffset = 0,
+ .mlen = 16,
+ },
+ [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
+ [PACKET_COLOR_TO_REG] = packet_color_to_reg,
};
-struct mlx5e_tc_flow_parse_attr {
- const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
- struct net_device *filter_dev;
- struct mlx5_flow_spec spec;
- int num_mod_hdr_actions;
- int max_mod_hdr_actions;
- void *mod_hdr_actions;
- int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
-};
+struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
+{
+ struct mlx5e_tc_table *tc;
-#define MLX5E_TC_TABLE_NUM_GROUPS 4
-#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
+ tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
+ return tc ? tc : ERR_PTR(-ENOMEM);
+}
-struct mlx5e_hairpin {
- struct mlx5_hairpin *pair;
+void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
+{
+ kvfree(tc);
+}
- struct mlx5_core_dev *func_mdev;
- struct mlx5e_priv *func_priv;
- u32 tdn;
- u32 tirn;
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
+{
+ return tc->chains;
+}
- int num_channels;
- struct mlx5e_rqt indir_rqt;
- u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_ttc_table ttc;
-};
+/* To avoid false lock dependency warning set the tc_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key tc_ht_lock_key;
-struct mlx5e_hairpin_entry {
- /* a node of a hash table which keeps all the hairpin entries */
- struct hlist_node hairpin_hlist;
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
+static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
- /* protects flows list */
- spinlock_t flows_lock;
- /* flows sharing the same hairpin */
- struct list_head flows;
- /* hpe's that were not fully initialized when dead peer update event
- * function traversed them.
- */
- struct list_head dead_peer_wait_list;
+void
+mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 val,
+ u32 mask)
+{
+ void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ u32 max_mask = GENMASK(match_len - 1, 0);
+ __be32 curr_mask_be, curr_val_be;
+ u32 curr_mask, curr_val;
- u16 peer_vhca_id;
- u8 prio;
- struct mlx5e_hairpin *hp;
- refcount_t refcnt;
- struct completion res_ready;
-};
+ fmask = headers_c + soffset;
+ fval = headers_v + soffset;
-struct mod_hdr_key {
- int num_actions;
- void *actions;
-};
+ memcpy(&curr_mask_be, fmask, 4);
+ memcpy(&curr_val_be, fval, 4);
-struct mlx5e_mod_hdr_entry {
- /* a node of a hash table which keeps all the mod_hdr entries */
- struct hlist_node mod_hdr_hlist;
+ curr_mask = be32_to_cpu(curr_mask_be);
+ curr_val = be32_to_cpu(curr_val_be);
- /* protects flows list */
- spinlock_t flows_lock;
- /* flows sharing the same mod_hdr entry */
- struct list_head flows;
+ //move to correct offset
+ WARN_ON(mask > max_mask);
+ mask <<= moffset;
+ val <<= moffset;
+ max_mask <<= moffset;
- struct mod_hdr_key key;
+ //zero val and mask
+ curr_mask &= ~max_mask;
+ curr_val &= ~max_mask;
- struct mlx5_modify_hdr *modify_hdr;
+ //add current to mask
+ curr_mask |= mask;
+ curr_val |= val;
- refcount_t refcnt;
- struct completion res_ready;
- int compl_result;
-};
+ //back to be32 and write
+ curr_mask_be = cpu_to_be32(curr_mask);
+ curr_val_be = cpu_to_be32(curr_val);
-#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+ memcpy(fmask, &curr_mask_be, 4);
+ memcpy(fval, &curr_val_be, 4);
-static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow);
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+}
-static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+void
+mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 *val,
+ u32 *mask)
{
- if (!flow || !refcount_inc_not_zero(&flow->refcnt))
- return ERR_PTR(-EINVAL);
- return flow;
+ void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ u32 max_mask = GENMASK(match_len - 1, 0);
+ __be32 curr_mask_be, curr_val_be;
+ u32 curr_mask, curr_val;
+
+ fmask = headers_c + soffset;
+ fval = headers_v + soffset;
+
+ memcpy(&curr_mask_be, fmask, 4);
+ memcpy(&curr_val_be, fval, 4);
+
+ curr_mask = be32_to_cpu(curr_mask_be);
+ curr_val = be32_to_cpu(curr_val_be);
+
+ *mask = (curr_mask >> moffset) & max_mask;
+ *val = (curr_val >> moffset) & max_mask;
}
-static void mlx5e_flow_put(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+int
+mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
{
- if (refcount_dec_and_test(&flow->refcnt)) {
- mlx5e_tc_del_flow(priv, flow);
- kfree_rcu(flow, rcu_head);
- }
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ char *modact;
+ int err;
+
+ modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
+
+ /* Firmware has 5bit length field and 0 means 32bits */
+ if (mlen == 32)
+ mlen = 0;
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field, mfield);
+ MLX5_SET(set_action_in, modact, offset, moffset);
+ MLX5_SET(set_action_in, modact, length, mlen);
+ MLX5_SET(set_action_in, modact, data, data);
+ err = mod_hdr_acts->num_actions;
+ mod_hdr_acts->num_actions++;
+
+ return err;
}
-static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+struct mlx5e_tc_int_port_priv *
+mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
{
- /* Complete all memory stores before setting bit. */
- smp_mb__before_atomic();
- set_bit(flag, &flow->flags);
-}
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
-#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
-static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
- unsigned long flag)
-{
- /* test_and_set_bit() provides all necessary barriers */
- return test_and_set_bit(flag, &flow->flags);
-}
+ return uplink_priv->int_port_priv;
+ }
-#define flow_flag_test_and_set(flow, flag) \
- __flow_flag_test_and_set(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
+ return NULL;
+}
-static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+struct mlx5e_flow_meters *
+mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
{
- /* Complete all memory stores before clearing bit. */
- smp_mb__before_atomic();
- clear_bit(flag, &flow->flags);
-}
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_priv *priv;
-#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
+ if (is_mdev_switchdev_mode(dev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ priv = netdev_priv(uplink_rpriv->netdev);
+ if (!uplink_priv->flow_meters)
+ uplink_priv->flow_meters =
+ mlx5e_flow_meters_init(priv,
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
+ if (!IS_ERR(uplink_priv->flow_meters))
+ return uplink_priv->flow_meters;
+ }
+
+ return NULL;
+}
-static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+static struct mlx5_tc_ct_priv *
+get_ct_priv(struct mlx5e_priv *priv)
{
- bool ret = test_bit(flag, &flow->flags);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
- /* Read fields of flow structure only after checking flags. */
- smp_mb__after_atomic();
- return ret;
+ return uplink_priv->ct_priv;
+ }
+
+ return tc->ct;
}
-#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
+static struct mlx5e_tc_psample *
+get_sample_priv(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->tc_psample;
+ }
-static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+ return NULL;
+}
+
+static struct mlx5e_post_act *
+get_post_action(struct mlx5e_priv *priv)
{
- return flow_flag_test(flow, ESWITCH);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->post_act;
+ }
+
+ return tc->post_act;
}
-static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
{
- return flow_flag_test(flow, FT);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (is_mdev_switchdev_mode(priv->mdev))
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
}
-static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
{
- return flow_flag_test(flow, OFFLOADED);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ return;
+ }
+
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
}
-static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
+static bool
+is_flow_meter_action(struct mlx5_flow_attr *attr)
{
- return jhash(key->actions,
- key->num_actions * MLX5_MH_ACT_SZ, 0);
+ return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
}
-static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
- struct mod_hdr_key *b)
+static int
+mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
{
- if (a->num_actions != b->num_actions)
- return 1;
+ struct mlx5e_post_act *post_act = get_post_action(priv);
+ struct mlx5e_post_meter_priv *post_meter;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
+ if (IS_ERR(meter)) {
+ mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
+ return PTR_ERR(meter);
+ }
+
+ ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
+ post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter,
+ meter->red_counter);
+ if (IS_ERR(post_meter)) {
+ mlx5_core_err(priv->mdev, "Failed to init post meter\n");
+ goto err_meter_init;
+ }
- return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
+ attr->meter_attr.meter = meter;
+ attr->meter_attr.post_meter = post_meter;
+ attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+
+err_meter_init:
+ mlx5e_tc_meter_put(meter);
+ return PTR_ERR(post_meter);
}
-static struct mod_hdr_tbl *
-get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
+static void
+mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
- &priv->fs.tc.mod_hdr;
+ mlx5e_post_meter_cleanup(attr->meter_attr.post_meter);
+ mlx5e_tc_meter_put(attr->meter_attr.meter);
}
-static struct mlx5e_mod_hdr_entry *
-mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
{
- struct mlx5e_mod_hdr_entry *mh, *found = NULL;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
- hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
- if (!cmp_mod_hdr_info(&mh->key, key)) {
- refcount_inc(&mh->refcnt);
- found = mh;
- break;
- }
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
+ &attr->parse_attr->mod_hdr_acts;
+
+ return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
+ spec, attr,
+ mod_hdr_acts);
}
- return found;
+ if (!is_mdev_switchdev_mode(priv->mdev))
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
+ return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
+
+ if (is_flow_meter_action(attr)) {
+ err = mlx5e_tc_add_flow_meter(priv, attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
}
-static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
- struct mlx5e_mod_hdr_entry *mh,
- int namespace)
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
{
- struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
return;
- hash_del(&mh->mod_hdr_hlist);
- mutex_unlock(&tbl->lock);
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev)) {
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+ return;
+ }
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
+ mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
+ return;
+ }
- WARN_ON(!list_empty(&mh->flows));
- if (mh->compl_result > 0)
- mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
- kfree(mh);
+ if (attr->meter_attr.meter)
+ mlx5e_tc_del_flow_meter(attr);
}
-static int get_flow_name_space(struct mlx5e_tc_flow *flow)
+int
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
{
- return mlx5e_is_eswitch_flow(flow) ?
- MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+ int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
+
+ return ret < 0 ? ret : 0;
}
-static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct mlx5e_tc_flow_parse_attr *parse_attr)
+
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data)
{
- int num_actions, actions_size, namespace, err;
- struct mlx5e_mod_hdr_entry *mh;
- struct mod_hdr_tbl *tbl;
- struct mod_hdr_key key;
- u32 hash_key;
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ char *modact;
+
+ modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
+
+ /* Firmware has 5bit length field and 0 means 32bits */
+ if (mlen == 32)
+ mlen = 0;
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field, mfield);
+ MLX5_SET(set_action_in, modact, offset, moffset);
+ MLX5_SET(set_action_in, modact, length, mlen);
+ MLX5_SET(set_action_in, modact, data, data);
+}
- num_actions = parse_attr->num_mod_hdr_actions;
- actions_size = MLX5_MH_ACT_SZ * num_actions;
+struct mlx5e_hairpin {
+ struct mlx5_hairpin *pair;
- key.actions = parse_attr->mod_hdr_actions;
- key.num_actions = num_actions;
+ struct mlx5_core_dev *func_mdev;
+ struct mlx5e_priv *func_priv;
+ u32 tdn;
+ struct mlx5e_tir direct_tir;
- hash_key = hash_mod_hdr_info(&key);
+ int num_channels;
+ struct mlx5e_rqt indir_rqt;
+ struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5_ttc_table *ttc;
+};
- namespace = get_flow_name_space(flow);
- tbl = get_mod_hdr_table(priv, namespace);
+struct mlx5e_hairpin_entry {
+ /* a node of a hash table which keeps all the hairpin entries */
+ struct hlist_node hairpin_hlist;
- mutex_lock(&tbl->lock);
- mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
- if (mh) {
- mutex_unlock(&tbl->lock);
- wait_for_completion(&mh->res_ready);
+ /* protects flows list */
+ spinlock_t flows_lock;
+ /* flows sharing the same hairpin */
+ struct list_head flows;
+ /* hpe's that were not fully initialized when dead peer update event
+ * function traversed them.
+ */
+ struct list_head dead_peer_wait_list;
- if (mh->compl_result < 0) {
- err = -EREMOTEIO;
- goto attach_header_err;
- }
- goto attach_flow;
- }
+ u16 peer_vhca_id;
+ u8 prio;
+ struct mlx5e_hairpin *hp;
+ refcount_t refcnt;
+ struct completion res_ready;
+};
- mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
- if (!mh) {
- mutex_unlock(&tbl->lock);
- return -ENOMEM;
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+{
+ if (!flow || !refcount_inc_not_zero(&flow->refcnt))
+ return ERR_PTR(-EINVAL);
+ return flow;
+}
+
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
+{
+ if (refcount_dec_and_test(&flow->refcnt)) {
+ mlx5e_tc_del_flow(priv, flow);
+ kfree_rcu(flow, rcu_head);
}
+}
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, ESWITCH);
+}
- mh->key.actions = (void *)mh + sizeof(*mh);
- memcpy(mh->key.actions, key.actions, actions_size);
- mh->key.num_actions = num_actions;
- spin_lock_init(&mh->flows_lock);
- INIT_LIST_HEAD(&mh->flows);
- refcount_set(&mh->refcnt, 1);
- init_completion(&mh->res_ready);
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, FT);
+}
- hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
- mutex_unlock(&tbl->lock);
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, OFFLOADED);
+}
- mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
- mh->key.num_actions,
- mh->key.actions);
- if (IS_ERR(mh->modify_hdr)) {
- err = PTR_ERR(mh->modify_hdr);
- mh->compl_result = err;
- goto alloc_header_err;
- }
- mh->compl_result = 1;
- complete_all(&mh->res_ready);
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
+{
+ return mlx5e_is_eswitch_flow(flow) ?
+ MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+}
-attach_flow:
+static struct mod_hdr_tbl *
+get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
+ &esw->offloads.mod_hdr :
+ &tc->mod_hdr;
+}
+
+static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5e_mod_hdr_handle *mh;
+
+ mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
+ mlx5e_get_flow_namespace(flow),
+ &parse_attr->mod_hdr_acts);
+ if (IS_ERR(mh))
+ return PTR_ERR(mh);
+
+ modify_hdr = mlx5e_mod_hdr_get(mh);
+ flow->attr->modify_hdr = modify_hdr;
flow->mh = mh;
- spin_lock(&mh->flows_lock);
- list_add(&flow->mod_hdr, &mh->flows);
- spin_unlock(&mh->flows_lock);
- if (mlx5e_is_eswitch_flow(flow))
- flow->esw_attr->modify_hdr = mh->modify_hdr;
- else
- flow->nic_attr->modify_hdr = mh->modify_hdr;
return 0;
-
-alloc_header_err:
- complete_all(&mh->res_ready);
-attach_header_err:
- mlx5e_mod_hdr_put(priv, mh, namespace);
- return err;
}
static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
@@ -439,137 +644,139 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
if (!flow->mh)
return;
- spin_lock(&flow->mh->flows_lock);
- list_del(&flow->mod_hdr);
- spin_unlock(&flow->mh->flows_lock);
-
- mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
+ mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
+ flow->mh);
flow->mh = NULL;
}
static
struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
{
+ struct mlx5_core_dev *mdev;
struct net_device *netdev;
struct mlx5e_priv *priv;
- netdev = __dev_get_by_index(net, ifindex);
+ netdev = dev_get_by_index(net, ifindex);
+ if (!netdev)
+ return ERR_PTR(-ENODEV);
+
priv = netdev_priv(netdev);
- return priv->mdev;
+ mdev = priv->mdev;
+ dev_put(netdev);
+
+ /* Mirred tc action holds a refcount on the ifindex net_device (see
+ * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
+ * after dev_put(netdev), while we're in the context of adding a tc flow.
+ *
+ * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
+ * stored in a hairpin object, which exists until all flows, that refer to it, get
+ * removed.
+ *
+ * On the other hand, after a hairpin object has been created, the peer net_device may
+ * be removed/unbound while there are still some hairpin flows that are using it. This
+ * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
+ * NETDEV_UNREGISTER event of the peer net_device.
+ */
+ return mdev;
}
static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
{
- u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
- void *tirc;
+ struct mlx5e_tir_builder *builder;
int err;
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
if (err)
- goto alloc_tdn_err;
-
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
- MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
- MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+ goto out;
- err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
+ mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
+ err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
if (err)
goto create_tir_err;
- return 0;
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
create_tir_err:
mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
-alloc_tdn_err:
- return err;
+
+ goto out;
}
static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
{
- mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
+ mlx5e_tir_destroy(&hp->direct_tir);
mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
}
-static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
-{
- u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
- struct mlx5e_priv *priv = hp->func_priv;
- int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
-
- mlx5e_build_default_indir_rqt(indirection_rqt, sz,
- hp->num_channels);
-
- for (i = 0; i < sz; i++) {
- ix = i;
- if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
- ix = mlx5e_bits_invert(i, ilog2(sz));
- ix = indirection_rqt[ix];
- rqn = hp->pair->rqn[ix];
- MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
- }
-}
-
static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
{
- int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
struct mlx5e_priv *priv = hp->func_priv;
struct mlx5_core_dev *mdev = priv->mdev;
- void *rqtc;
- u32 *in;
+ struct mlx5e_rss_params_indir *indir;
+ int err;
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
+ indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
+ if (!indir)
return -ENOMEM;
- rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
-
- MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
- MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
-
- mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
-
- err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
- if (!err)
- hp->indir_rqt.enabled = true;
+ mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
+ err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
+ mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+ indir);
- kvfree(in);
+ kvfree(indir);
return err;
}
static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
{
struct mlx5e_priv *priv = hp->func_priv;
- u32 in[MLX5_ST_SZ_DW(create_tir_in)];
- int tt, i, err;
- void *tirc;
+ struct mlx5e_rss_params_hash rss_hash;
+ enum mlx5_traffic_types tt, max_tt;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+ struct mlx5e_rss_params_traffic_type rss_tt;
- memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
- tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
- MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
- MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
- MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
+ mlx5e_tir_builder_build_rqt(builder, hp->tdn,
+ mlx5e_rqt_get_rqtn(&hp->indir_rqt),
+ false);
+ mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
- err = mlx5_core_create_tir(hp->func_mdev, in,
- MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
+ err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
if (err) {
mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
goto err_destroy_tirs;
}
+
+ mlx5e_tir_builder_clear(builder);
}
- return 0;
-err_destroy_tirs:
- for (i = 0; i < tt; i++)
- mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
+out:
+ mlx5e_tir_builder_free(builder);
return err;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_tir_destroy(&hp->indir_tir[tt]);
+
+ goto out;
}
static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
@@ -577,7 +784,7 @@ static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
int tt;
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
+ mlx5e_tir_destroy(&hp->indir_tir[tt]);
}
static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
@@ -588,12 +795,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->any_tt_tirn = hp->tirn;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
+ ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_tir_get_tirn(&hp->direct_tir) :
+ mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+ }
- ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_TC_PRIO;
}
@@ -602,6 +813,7 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
{
struct mlx5e_priv *priv = hp->func_priv;
struct ttc_params ttc_params;
+ struct mlx5_ttc_table *ttc;
int err;
err = mlx5e_hairpin_create_indirect_rqt(hp);
@@ -613,30 +825,32 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
goto err_create_indirect_tirs;
mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
- err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
- if (err)
+ hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(hp->ttc)) {
+ err = PTR_ERR(hp->ttc);
goto err_create_ttc_table;
+ }
+ ttc = mlx5e_fs_get_ttc(priv->fs, false);
netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
- hp->num_channels, hp->ttc.ft.t->id);
+ hp->num_channels,
+ mlx5_get_ttc_flow_table(ttc)->id);
return 0;
err_create_ttc_table:
mlx5e_hairpin_destroy_indirect_tirs(hp);
err_create_indirect_tirs:
- mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+ mlx5e_rqt_destroy(&hp->indir_rqt);
return err;
}
static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
{
- struct mlx5e_priv *priv = hp->func_priv;
-
- mlx5e_destroy_ttc_table(priv, &hp->ttc);
+ mlx5_destroy_ttc_table(hp->ttc);
mlx5e_hairpin_destroy_indirect_tirs(hp);
- mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+ mlx5e_rqt_destroy(&hp->indir_rqt);
}
static struct mlx5e_hairpin *
@@ -654,6 +868,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
func_mdev = priv->mdev;
peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (IS_ERR(peer_mdev)) {
+ err = PTR_ERR(peer_mdev);
+ goto create_pair_err;
+ }
pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
if (IS_ERR(pair)) {
@@ -703,10 +921,11 @@ static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
u16 peer_vhca_id, u8 prio)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5e_hairpin_entry *hpe;
u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
- hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
+ hash_for_each_possible(tc->hairpin_tbl, hpe,
hairpin_hlist, hash_key) {
if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
refcount_inc(&hpe->refcnt);
@@ -720,11 +939,12 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
struct mlx5e_hairpin_entry *hpe)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
/* no more hairpin flows for us, release the hairpin pair */
- if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
return;
hash_del(&hpe->hairpin_hlist);
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
if (!IS_ERR_OR_NULL(hpe->hp)) {
netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
@@ -780,6 +1000,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
int peer_ifindex = parse_attr->mirred_ifindex[0];
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
@@ -792,6 +1013,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
int err;
peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (IS_ERR(peer_mdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
+ return PTR_ERR(peer_mdev);
+ }
+
if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
return -EOPNOTSUPP;
@@ -803,10 +1029,10 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
if (err)
return err;
- mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_lock(&tc->hairpin_tbl_lock);
hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
if (hpe) {
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
wait_for_completion(&hpe->res_ready);
if (IS_ERR(hpe->hp)) {
@@ -818,7 +1044,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
if (!hpe) {
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
return -ENOMEM;
}
@@ -830,11 +1056,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
refcount_set(&hpe->refcnt, 1);
init_completion(&hpe->res_ready);
- hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
+ hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
hash_hairpin_info(peer_id, match_prio));
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
- params.log_data_size = 15;
+ params.log_data_size = 16;
params.log_data_size = min_t(u8, params.log_data_size,
MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
params.log_data_size = max_t(u8, params.log_data_size,
@@ -862,16 +1088,17 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
}
netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
- hp->tirn, hp->pair->rqn[0],
+ mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
dev_name(hp->pair->peer_mdev->device),
hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
attach_flow:
if (hpe->hp->num_channels > 1) {
flow_flag_set(flow, HAIRPIN_RSS);
- flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+ flow->attr->nic_attr->hairpin_ft =
+ mlx5_get_ttc_flow_table(hpe->hp->ttc);
} else {
- flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+ flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
}
flow->hpe = hpe;
@@ -901,209 +1128,380 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
flow->hpe = NULL;
}
-static int
-mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
- struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_fs_chains *nic_chains;
struct mlx5_flow_act flow_act = {
.action = attr->action,
.flags = FLOW_ACT_NO_APPEND,
};
- struct mlx5_fc *counter = NULL;
- int err, dest_ix = 0;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_table *ft;
+ int dest_ix = 0;
+ nic_chains = mlx5e_nic_chains(tc);
flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
- flow_context->flow_tag = attr->flow_tag;
-
- if (flow_flag_test(flow, HAIRPIN)) {
- err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
- if (err)
- return err;
+ flow_context->flow_tag = nic_attr->flow_tag;
- if (flow_flag_test(flow, HAIRPIN_RSS)) {
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[dest_ix].ft = attr->hairpin_ft;
- } else {
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest[dest_ix].tir_num = attr->hairpin_tirn;
- }
+ if (attr->dest_ft) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = attr->dest_ft;
+ dest_ix++;
+ } else if (nic_attr->hairpin_ft) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = nic_attr->hairpin_ft;
+ dest_ix++;
+ } else if (nic_attr->hairpin_tirn) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
dest_ix++;
} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[dest_ix].ft = priv->fs.vlan.ft.t;
+ if (attr->dest_chain) {
+ dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
+ attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+ if (IS_ERR(dest[dest_ix].ft))
+ return ERR_CAST(dest[dest_ix].ft);
+ } else {
+ dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
+ }
dest_ix++;
}
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(dev, true);
- if (IS_ERR(counter))
- return PTR_ERR(counter);
+ if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[dest_ix].counter_id = mlx5_fc_id(counter);
+ dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
dest_ix++;
- attr->counter = counter;
}
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_hdr = attr->modify_hdr;
- kfree(parse_attr->mod_hdr_actions);
- if (err)
- return err;
+
+ mutex_lock(&tc->t_lock);
+ if (IS_ERR_OR_NULL(tc->t)) {
+ /* Create the root table here if doesn't exist yet */
+ tc->t =
+ mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
+
+ if (IS_ERR(tc->t)) {
+ mutex_unlock(&tc->t_lock);
+ netdev_err(priv->netdev,
+ "Failed to create tc offload table\n");
+ rule = ERR_CAST(tc->t);
+ goto err_ft_get;
+ }
}
+ mutex_unlock(&tc->t_lock);
- mutex_lock(&priv->fs.tc.t_lock);
- if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
- struct mlx5_flow_table_attr ft_attr = {};
- int tc_grp_size, tc_tbl_size, tc_num_grps;
- u32 max_flow_counter;
+ if (attr->chain || attr->prio)
+ ft = mlx5_chains_get_table(nic_chains,
+ attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+ else
+ ft = attr->ft;
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+ if (IS_ERR(ft)) {
+ rule = ERR_CAST(ft);
+ goto err_ft_get;
+ }
- tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
- tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
- BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
- tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
+ rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act, dest, dest_ix);
+ if (IS_ERR(rule))
+ goto err_rule;
- ft_attr.prio = MLX5E_TC_PRIO;
- ft_attr.max_fte = tc_tbl_size;
- ft_attr.level = MLX5E_TC_FT_LEVEL;
- ft_attr.autogroup.max_num_groups = tc_num_grps;
- priv->fs.tc.t =
- mlx5_create_auto_grouped_flow_table(priv->fs.ns,
- &ft_attr);
- if (IS_ERR(priv->fs.tc.t)) {
- mutex_unlock(&priv->fs.tc.t_lock);
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to create tc offload table\n");
- netdev_err(priv->netdev,
- "Failed to create tc offload table\n");
- return PTR_ERR(priv->fs.tc.t);
- }
+ return rule;
+
+err_rule:
+ if (attr->chain || attr->prio)
+ mlx5_chains_put_table(nic_chains,
+ attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+err_ft_get:
+ if (attr->dest_chain)
+ mlx5_chains_put_table(nic_chains,
+ attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+
+ return ERR_CAST(rule);
+}
+
+static int
+alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
+ struct mlx5_flow_attr *attr)
+
+{
+ struct mlx5_fc *counter;
+
+ counter = mlx5_fc_create(counter_dev, true);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
+ attr->counter = counter;
+ return 0;
+}
+
+static int
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_core_dev *dev = priv->mdev;
+ int err;
+
+ parse_attr = attr->parse_attr;
+
+ if (flow_flag_test(flow, HAIRPIN)) {
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(dev, attr);
+ if (err)
+ return err;
}
- if (attr->match_level != MLX5_MATCH_NONE)
- parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
+ if (err)
+ return err;
+ }
- flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
- &flow_act, dest, dest_ix);
- mutex_unlock(&priv->fs.tc.t_lock);
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
+ attr, &parse_attr->mod_hdr_acts);
+ else
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
+ attr);
return PTR_ERR_OR_ZERO(flow->rule[0]);
}
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_fs_chains *nic_chains;
+
+ nic_chains = mlx5e_nic_chains(tc);
+ mlx5_del_flow_rules(rule);
+
+ if (attr->chain || attr->prio)
+ mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+
+ if (attr->dest_chain)
+ mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+}
+
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
- struct mlx5_fc *counter = NULL;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_flow_attr *attr = flow->attr;
- counter = attr->counter;
- if (!IS_ERR_OR_NULL(flow->rule[0]))
- mlx5_del_flow_rules(flow->rule[0]);
- mlx5_fc_destroy(priv->mdev, counter);
+ flow_flag_clear(flow, OFFLOADED);
- mutex_lock(&priv->fs.tc.t_lock);
- if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
- mlx5_destroy_flow_table(priv->fs.tc.t);
- priv->fs.tc.t = NULL;
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
+ else if (!IS_ERR_OR_NULL(flow->rule[0]))
+ mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
+
+ /* Remove root table if no rules are left to avoid
+ * extra steering hops.
+ */
+ mutex_lock(&tc->t_lock);
+ if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
+ !IS_ERR_OR_NULL(tc->t)) {
+ mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
+ tc->t = NULL;
}
- mutex_unlock(&priv->fs.tc.t_lock);
+ mutex_unlock(&tc->t_lock);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(priv->mdev, attr->counter);
+
if (flow_flag_test(flow, HAIRPIN))
mlx5e_hairpin_flow_del(priv, flow);
-}
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index);
+ free_flow_post_acts(flow);
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *mirred_dev,
- int out_index,
- struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid);
+ kvfree(attr->parse_attr);
+ kfree(flow->attr);
+}
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct mlx5_flow_handle *rule;
- rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+ rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
+
if (IS_ERR(rule))
return rule;
- if (attr->split_count) {
+ if (attr->esw_attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
- if (IS_ERR(flow->rule[1])) {
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
- return flow->rule[1];
- }
+ if (IS_ERR(flow->rule[1]))
+ goto err_rule1;
}
return rule;
+
+err_rule1:
+ mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
+ return flow->rule[1];
}
-static void
-mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow,
- struct mlx5_esw_flow_attr *attr)
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
{
flow_flag_clear(flow, OFFLOADED);
- if (attr->split_count)
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+ return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+
+ if (attr->esw_attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
}
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
- struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *slow_attr)
+ struct mlx5_flow_spec *spec)
{
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5e_mod_hdr_handle *mh = NULL;
+ struct mlx5_flow_attr *slow_attr;
struct mlx5_flow_handle *rule;
+ bool fwd_and_modify_cap;
+ u32 chain_mapping = 0;
+ int err;
- memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!slow_attr)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr->split_count = 0;
- slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr->esw_attr->split_count = 0;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
+ if (!fwd_and_modify_cap)
+ goto skip_restore;
+ err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
+ if (err)
+ goto err_get_chain;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ CHAIN_TO_REG, chain_mapping);
+ if (err)
+ goto err_reg_set;
+
+ mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
+ MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
+ if (IS_ERR(mh)) {
+ err = PTR_ERR(mh);
+ goto err_attach;
+ }
+
+ slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
+
+skip_restore:
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
- if (!IS_ERR(rule))
- flow_flag_set(flow, SLOW);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto err_offload;
+ }
+
+ flow->slow_mh = mh;
+ flow->chain_mapping = chain_mapping;
+ flow_flag_set(flow, SLOW);
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ kfree(slow_attr);
return rule;
+
+err_offload:
+ if (fwd_and_modify_cap)
+ mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
+err_attach:
+err_reg_set:
+ if (fwd_and_modify_cap)
+ mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
+err_get_chain:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ kfree(slow_attr);
+ return ERR_PTR(err);
}
-static void
-mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow,
- struct mlx5_esw_flow_attr *slow_attr)
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow)
{
- memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ struct mlx5_flow_attr *slow_attr;
+
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!slow_attr) {
+ mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
+ return;
+ }
+
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr->split_count = 0;
- slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr->esw_attr->split_count = 0;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+ if (flow->slow_mh) {
+ slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh);
+ }
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+ if (flow->slow_mh) {
+ mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh);
+ mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
+ flow->chain_mapping = 0;
+ flow->slow_mh = NULL;
+ }
flow_flag_clear(flow, SLOW);
+ kfree(slow_attr);
}
/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
@@ -1155,108 +1553,327 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
mutex_unlock(&uplink_priv->unready_flows_lock);
}
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
+{
+ struct mlx5_core_dev *out_mdev, *route_mdev;
+ struct mlx5e_priv *out_priv, *route_priv;
+
+ out_priv = netdev_priv(out_dev);
+ out_mdev = out_priv->mdev;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ if (out_mdev->coredev_type != MLX5_COREDEV_PF)
+ return false;
+
+ if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
+ route_mdev->coredev_type != MLX5_COREDEV_SF)
+ return false;
+
+ return mlx5e_same_hw_devs(out_priv, route_priv);
+}
+
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+{
+ struct mlx5e_priv *out_priv, *route_priv;
+ struct mlx5_devcom *devcom = NULL;
+ struct mlx5_core_dev *route_mdev;
+ struct mlx5_eswitch *esw;
+ u16 vhca_id;
+ int err;
+
+ out_priv = netdev_priv(out_dev);
+ esw = out_priv->mdev->priv.eswitch;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+ if (mlx5_lag_is_active(out_priv->mdev)) {
+ /* In lag case we may get devices from different eswitch instances.
+ * If we failed to get vport num, it means, mostly, that we on the wrong
+ * eswitch.
+ */
+ err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ if (err != -ENOENT)
+ return err;
+
+ devcom = out_priv->mdev->priv.devcom;
+ esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!esw)
+ return -ENODEV;
+ }
+
+ err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ if (devcom)
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return err;
+}
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+ struct mlx5_modify_hdr *mod_hdr;
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev,
+ mlx5e_get_flow_namespace(flow),
+ mod_hdr_acts->num_actions,
+ mod_hdr_acts->actions);
+ if (IS_ERR(mod_hdr))
+ return PTR_ERR(mod_hdr);
+
+ WARN_ON(attr->modify_hdr);
+ attr->modify_hdr = mod_hdr;
+
+ return 0;
+}
+
+static int
+set_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack,
+ bool *encap_valid,
+ bool *vf_tun)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *encap_dev = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ int out_index;
+ int err = 0;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return 0;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+ *encap_valid = true;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ struct net_device *out_dev;
+ int mirred_ifindex;
+
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
+ out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+ if (!out_dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+ err = -ENODEV;
+ goto out;
+ }
+ err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
+ extack, &encap_dev, encap_valid);
+ dev_put(out_dev);
+ if (err)
+ goto out;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[out_index].rep = rpriv->rep;
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
+ }
+
+ if (*vf_tun && esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static void
+clean_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ bool *vf_tun)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ int out_index;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ mlx5e_detach_encap(priv, flow, attr, out_index);
+ kfree(attr->parse_attr->tun_info[out_index]);
+ }
+}
+
static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
- struct net_device *out_dev, *encap_dev = NULL;
- struct mlx5_fc *counter = NULL;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *out_priv;
- bool encap_valid = true;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ bool vf_tun, encap_valid;
u32 max_prio, max_chain;
int err = 0;
- int out_index;
- if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
- NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
- return -EOPNOTSUPP;
- }
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
/* We check chain range only for tc flows.
* For ft flows, we checked attr->chain was originally 0 and set it to
* FDB_FT_CHAIN which is outside tc range.
* See mlx5e_rep_setup_ft_cb().
*/
- max_chain = mlx5_esw_chains_get_chain_range(esw);
+ max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
- NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
- return -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested chain is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_out;
}
- max_prio = mlx5_esw_chains_get_prio_range(esw);
+ max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
if (attr->prio > max_prio) {
- NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
- return -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested priority is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_out;
}
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- int mirred_ifindex;
+ if (flow_flag_test(flow, TUN_RX)) {
+ err = mlx5e_attach_decap_route(priv, flow);
+ if (err)
+ goto err_out;
+
+ if (!attr->chain && esw_attr->int_port &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ /* If decap route device is internal port, change the
+ * source vport value in reg_c0 back to uplink just in
+ * case the rule performs goto chain > 0. If we have a miss
+ * on chain > 0 we want the metadata regs to hold the
+ * chain id so SW will resume handling of this packet
+ * from the proper chain.
+ */
+ u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
+ esw_attr->in_rep->vport);
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
+ metadata);
+ if (err)
+ goto err_out;
- if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
- continue;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+ }
- mirred_ifindex = parse_attr->mirred_ifindex[out_index];
- out_dev = __dev_get_by_index(dev_net(priv->netdev),
- mirred_ifindex);
- err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
- extack, &encap_dev, &encap_valid);
+ if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
+ err = mlx5e_attach_decap(priv, flow, extack);
if (err)
- return err;
+ goto err_out;
+ }
- out_priv = netdev_priv(encap_dev);
- rpriv = out_priv->ppriv;
- attr->dests[out_index].rep = rpriv->rep;
- attr->dests[out_index].mdev = out_priv->mdev;
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ struct mlx5e_tc_int_port *int_port;
+
+ if (attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Internal port rule is only supported on chain 0");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (attr->dest_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Internal port rule offload doesn't support goto action");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
+ parse_attr->filter_dev->ifindex,
+ flow_flag_test(flow, EGRESS) ?
+ MLX5E_TC_INT_PORT_EGRESS :
+ MLX5E_TC_INT_PORT_INGRESS);
+ if (IS_ERR(int_port)) {
+ err = PTR_ERR(int_port);
+ goto err_out;
+ }
+
+ esw_attr->int_port = int_port;
}
+ err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
+ if (err)
+ goto err_out;
+
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
- return err;
+ goto err_out;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- kfree(parse_attr->mod_hdr_actions);
- if (err)
- return err;
+ if (vf_tun) {
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err)
+ goto err_out;
+ } else {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ if (err)
+ goto err_out;
+ }
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(attr->counter_dev, true);
- if (IS_ERR(counter))
- return PTR_ERR(counter);
-
- attr->counter = counter;
+ err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
+ if (err)
+ goto err_out;
}
/* we get here if one of the following takes place:
* (1) there's no error
* (2) there's an encap action and we don't have valid neigh
*/
- if (!encap_valid) {
- /* continue with goto slow path rule instead */
- struct mlx5_esw_flow_attr slow_attr;
-
- flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
- } else {
+ if (!encap_valid || flow_flag_test(flow, SLOW))
+ flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
+ else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
- }
- if (IS_ERR(flow->rule[0]))
- return PTR_ERR(flow->rule[0]);
- else
- flow_flag_set(flow, OFFLOADED);
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ goto err_out;
+ }
+ flow_flag_set(flow, OFFLOADED);
return 0;
+
+err_out:
+ flow_flag_set(flow, FAILED);
+ return err;
}
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
{
- struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
+ struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
void *headers_v = MLX5_ADDR_OF(fte_match_param,
spec->match_value,
misc_parameters_3);
@@ -1271,543 +1888,585 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct mlx5_esw_flow_attr slow_attr;
- int out_index;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ bool vf_tun;
+
+ esw_attr = attr->esw_attr;
+ mlx5e_put_flow_tunnel_id(flow);
- if (flow_flag_test(flow, NOT_READY)) {
+ if (flow_flag_test(flow, NOT_READY))
remove_unready_flow(flow);
- kvfree(attr->parse_attr);
- return;
- }
if (mlx5e_is_offloaded_flow(flow)) {
if (flow_flag_test(flow, SLOW))
- mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
else
mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
}
+ complete_all(&flow->del_hw_done);
if (mlx5_flow_has_geneve_opt(flow))
mlx5_geneve_tlv_option_del(priv->mdev->geneve);
mlx5_eswitch_del_vlan_action(esw, attr);
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
- mlx5e_detach_encap(priv, flow, out_index);
- kfree(attr->parse_attr->tun_info[out_index]);
- }
- kvfree(attr->parse_attr);
+ if (flow->decap_route)
+ mlx5e_detach_decap_route(priv, flow);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- mlx5e_detach_mod_hdr(priv, flow);
+ clean_encap_dests(priv, flow, attr, &vf_tun);
+
+ mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ if (vf_tun && attr->modify_hdr)
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ else
+ mlx5e_detach_mod_hdr(priv, flow);
+ }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(attr->counter_dev, attr->counter);
+ mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
+
+ if (esw_attr->int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
+
+ if (esw_attr->dest_int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
+
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
+ mlx5e_detach_decap(priv, flow);
+
+ free_flow_post_acts(flow);
+
+ if (flow->attr->lag.count)
+ mlx5_lag_del_mpesw_rule(esw->dev);
+
+ kvfree(attr->esw_attr->rx_tun_attr);
+ kvfree(attr->parse_attr);
+ kfree(flow->attr);
}
-void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct list_head *flow_list)
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr slow_attr, *esw_attr;
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- struct mlx5e_tc_flow *flow;
- int err;
+ struct mlx5_flow_attr *attr;
- e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- e->encap_size, e->encap_header,
- MLX5_FLOW_NAMESPACE_FDB);
- if (IS_ERR(e->pkt_reformat)) {
- mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
- PTR_ERR(e->pkt_reformat));
- return;
- }
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(priv);
-
- list_for_each_entry(flow, flow_list, tmp_list) {
- bool all_flow_encaps_valid = true;
- int i;
+ attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
+ return attr->counter;
+}
- if (!mlx5e_is_offloaded_flow(flow))
- continue;
- esw_attr = flow->esw_attr;
- spec = &esw_attr->parse_attr->spec;
-
- esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
- esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- /* Flow can be associated with multiple encap entries.
- * Before offloading the flow verify that all of them have
- * a valid neighbour.
- */
- for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
- if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
- continue;
- if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
- all_flow_encaps_valid = false;
- break;
- }
- }
- /* Do not offload flows with unresolved neighbors */
- if (!all_flow_encaps_valid)
- continue;
- /* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
- err);
- continue;
- }
+/* Iterate over tmp_list of flows attached to flow_list head. */
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
- mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
- flow->rule[0] = rule;
- /* was unset when slow path rule removed */
- flow_flag_set(flow, OFFLOADED);
- }
+ list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
+ mlx5e_flow_put(priv, flow);
}
-void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct list_head *flow_list)
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr slow_attr;
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- struct mlx5e_tc_flow *flow;
- int err;
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
- list_for_each_entry(flow, flow_list, tmp_list) {
- if (!mlx5e_is_offloaded_flow(flow))
- continue;
- spec = &flow->esw_attr->parse_attr->spec;
+ if (!flow_flag_test(flow, ESWITCH) ||
+ !flow_flag_test(flow, DUP))
+ return;
- /* update from encap rule to slow path rule */
- rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
- /* mark the flow's encap dest as non-valid */
- flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_del(&flow->peer);
+ mutex_unlock(&esw->offloads.peer_mutex);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
- err);
- continue;
- }
+ flow_flag_clear(flow, DUP);
- mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
- flow->rule[0] = rule;
- /* was unset when fast path rule removed */
- flow_flag_set(flow, OFFLOADED);
+ if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+ kfree(flow->peer_flow);
}
- /* we know that the encap is valid */
- e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ flow->peer_flow = NULL;
}
-static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- if (mlx5e_is_eswitch_flow(flow))
- return flow->esw_attr->counter;
- else
- return flow->nic_attr->counter;
-}
+ struct mlx5_core_dev *dev = flow->priv->mdev;
+ struct mlx5_devcom *devcom = dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw;
-/* Takes reference to all flows attached to encap and adds the flows to
- * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
- */
-void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
-{
- struct encap_flow_item *efi;
- struct mlx5e_tc_flow *flow;
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return;
- list_for_each_entry(efi, &e->flows, list) {
- flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
- if (IS_ERR(mlx5e_flow_get(flow)))
- continue;
- wait_for_completion(&flow->init_done);
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
- flow->tmp_efi_index = efi->index;
- list_add(&flow->tmp_list, flow_list);
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (mlx5e_is_eswitch_flow(flow)) {
+ mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ } else {
+ mlx5e_tc_del_nic_flow(priv, flow);
}
}
-/* Iterate over tmp_list of flows attached to flow_list head. */
-void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
+static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
{
- struct mlx5e_tc_flow *flow, *tmp;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_action *flow_action = &rule->action;
+ const struct flow_action_entry *act;
+ int i;
- list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
- mlx5e_flow_put(priv, flow);
+ if (chain)
+ return false;
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_GOTO:
+ return true;
+ case FLOW_ACTION_SAMPLE:
+ return true;
+ default:
+ continue;
+ }
+ }
+
+ return false;
}
-static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
- struct mlx5e_encap_entry *e)
+static int
+enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
+ struct flow_dissector_key_enc_opts *opts,
+ struct netlink_ext_ack *extack,
+ bool *dont_care)
{
- struct mlx5e_encap_entry *next = NULL;
+ struct geneve_opt *opt;
+ int off = 0;
-retry:
- rcu_read_lock();
+ *dont_care = true;
- /* find encap with non-zero reference counter value */
- for (next = e ?
- list_next_or_null_rcu(&nhe->encap_list,
- &e->encap_list,
- struct mlx5e_encap_entry,
- encap_list) :
- list_first_or_null_rcu(&nhe->encap_list,
- struct mlx5e_encap_entry,
- encap_list);
- next;
- next = list_next_or_null_rcu(&nhe->encap_list,
- &next->encap_list,
- struct mlx5e_encap_entry,
- encap_list))
- if (mlx5e_encap_take(next))
- break;
+ while (opts->len > off) {
+ opt = (struct geneve_opt *)&opts->data[off];
- rcu_read_unlock();
+ if (!(*dont_care) || opt->opt_class || opt->type ||
+ memchr_inv(opt->opt_data, 0, opt->length * 4)) {
+ *dont_care = false;
- /* release starting encap */
- if (e)
- mlx5e_encap_put(netdev_priv(e->out_dev), e);
- if (!next)
- return next;
+ if (opt->opt_class != htons(U16_MAX) ||
+ opt->type != U8_MAX) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Partial match of tunnel options in chain > 0 isn't supported");
+ netdev_warn(priv->netdev,
+ "Partial match of tunnel options in chain > 0 isn't supported");
+ return -EOPNOTSUPP;
+ }
+ }
- /* wait for encap to be fully initialized */
- wait_for_completion(&next->res_ready);
- /* continue searching if encap entry is not in valid state after completion */
- if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
- e = next;
- goto retry;
+ off += sizeof(struct geneve_opt) + opt->length * 4;
}
- return next;
+ return 0;
}
-void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+#define COPY_DISSECTOR(rule, diss_key, dst)\
+({ \
+ struct flow_rule *__rule = (rule);\
+ typeof(dst) __dst = dst;\
+\
+ memcpy(__dst,\
+ skb_flow_dissector_target(__rule->match.dissector,\
+ diss_key,\
+ __rule->match.key),\
+ sizeof(*__dst));\
+})
+
+static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct flow_cls_offload *f,
+ struct net_device *filter_dev)
{
- struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
- struct mlx5e_encap_entry *e = NULL;
- struct mlx5e_tc_flow *flow;
- struct mlx5_fc *counter;
- struct neigh_table *tbl;
- bool neigh_used = false;
- struct neighbour *n;
- u64 lastuse;
-
- if (m_neigh->family == AF_INET)
- tbl = &arp_tbl;
-#if IS_ENABLED(CONFIG_IPV6)
- else if (m_neigh->family == AF_INET6)
- tbl = ipv6_stub->nd_tbl;
-#endif
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
+ struct flow_match_enc_opts enc_opts_match;
+ struct tunnel_match_enc_opts tun_enc_opts;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tunnel_match_key tunnel_key;
+ bool enc_opts_is_dont_care = true;
+ u32 tun_id, enc_opts_id = 0;
+ struct mlx5_eswitch *esw;
+ u32 value, mask;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ memset(&tunnel_key, 0, sizeof(tunnel_key));
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ &tunnel_key.enc_control);
+ if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ &tunnel_key.enc_ipv4);
else
- return;
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ &tunnel_key.enc_ipv6);
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
+ &tunnel_key.enc_tp);
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
+ &tunnel_key.enc_key_id);
+ tunnel_key.filter_ifindex = filter_dev->ifindex;
+
+ err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
+ if (err)
+ return err;
- /* mlx5e_get_next_valid_encap() releases previous encap before returning
- * next one.
- */
- while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
- struct mlx5e_priv *priv = netdev_priv(e->out_dev);
- struct encap_flow_item *efi, *tmp;
- struct mlx5_eswitch *esw;
- LIST_HEAD(flow_list);
-
- esw = priv->mdev->priv.eswitch;
- mutex_lock(&esw->offloads.encap_tbl_lock);
- list_for_each_entry_safe(efi, tmp, &e->flows, list) {
- flow = container_of(efi, struct mlx5e_tc_flow,
- encaps[efi->index]);
- if (IS_ERR(mlx5e_flow_get(flow)))
- continue;
- list_add(&flow->tmp_list, &flow_list);
-
- if (mlx5e_is_offloaded_flow(flow)) {
- counter = mlx5e_tc_get_counter(flow);
- lastuse = mlx5_fc_query_lastuse(counter);
- if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
- neigh_used = true;
- break;
- }
- }
- }
- mutex_unlock(&esw->offloads.encap_tbl_lock);
+ flow_rule_match_enc_opts(rule, &enc_opts_match);
+ err = enc_opts_is_dont_care_or_full_match(priv,
+ enc_opts_match.mask,
+ extack,
+ &enc_opts_is_dont_care);
+ if (err)
+ goto err_enc_opts;
- mlx5e_put_encap_flow_list(priv, &flow_list);
- if (neigh_used) {
- /* release current encap before breaking the loop */
- mlx5e_encap_put(priv, e);
- break;
- }
- }
+ if (!enc_opts_is_dont_care) {
+ memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
+ memcpy(&tun_enc_opts.key, enc_opts_match.key,
+ sizeof(*enc_opts_match.key));
+ memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
+ sizeof(*enc_opts_match.mask));
- trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+ err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
+ &tun_enc_opts, &enc_opts_id);
+ if (err)
+ goto err_enc_opts;
+ }
- if (neigh_used) {
- nhe->reported_lastuse = jiffies;
+ value = tun_id << ENC_OPTS_BITS | enc_opts_id;
+ mask = enc_opts_id ? TUNNEL_ID_MASK :
+ (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
- /* find the relevant neigh according to the cached device and
- * dst ip pair
- */
- n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
- if (!n)
- return;
+ if (attr->chain) {
+ mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
+ TUNNEL_TO_REG, value, mask);
+ } else {
+ mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+ err = mlx5e_tc_match_to_reg_set(priv->mdev,
+ mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
+ TUNNEL_TO_REG, value);
+ if (err)
+ goto err_set;
- neigh_event_send(n, NULL);
- neigh_release(n);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
+
+ flow->attr->tunnel_id = value;
+ return 0;
+
+err_set:
+ if (enc_opts_id)
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id);
+err_enc_opts:
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
+ return err;
}
-static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
{
- WARN_ON(!list_empty(&e->flows));
+ u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
+ u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5_eswitch *esw;
- if (e->compl_result > 0) {
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+ esw = flow->priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ if (tun_id)
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
+ if (enc_opts_id)
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id);
+}
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+ struct flow_match_basic *match, bool outer,
+ void *headers_c, void *headers_v)
+{
+ bool ip_version_cap;
+
+ ip_version_cap = outer ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+
+ if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
+ (match->key->n_proto == htons(ETH_P_IP) ||
+ match->key->n_proto == htons(ETH_P_IPV6))) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
+ match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(match->mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(match->key->n_proto));
}
-
- kfree(e->tun_info);
- kfree(e->encap_header);
- kfree_rcu(e, rcu);
}
-void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
- return;
- hash_del_rcu(&e->encap_hlist);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
+ void *headers_v;
+ u16 ethertype;
+ u8 ip_version;
- mlx5e_encap_dealloc(priv, e);
+ if (outer)
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+ else
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+
+ ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
+ /* Return ip_version converted from ethertype anyway */
+ if (!ip_version) {
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+ if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
+ ip_version = 4;
+ else if (ethertype == ETH_P_IPV6)
+ ip_version = 6;
+ }
+ return ip_version;
}
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
+/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
+ * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
+ * +---------+----------------------------------------+
+ * |Arriving | Arriving Outer Header |
+ * | Inner +---------+---------+---------+----------+
+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
+ * +---------+---------+---------+---------+----------+
+ * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
+ * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
+ * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
+ * | CE | CE | CE | CE | CE |
+ * +---------+---------+---------+---------+----------+
+ *
+ * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
+ * the inner ip_ecn value before hardware decap action.
+ *
+ * Cells marked are changed from original inner packet ip_ecn value during decap, and
+ * so matching those values on inner ip_ecn before decap will fail.
+ *
+ * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
+ * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
+ * and such we can drop the inner ip_ecn=CE match.
+ */
+
+static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ bool *match_inner_ecn)
{
- struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ip match;
- /* flow wasn't fully initialized */
- if (!e)
- return;
+ *match_inner_ecn = true;
- mutex_lock(&esw->offloads.encap_tbl_lock);
- list_del(&flow->encaps[out_index].list);
- flow->encaps[out_index].e = NULL;
- if (!refcount_dec_and_test(&e->refcnt)) {
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- return;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ flow_rule_match_enc_ip(rule, &match);
+ outer_ecn_key = match.key->tos & INET_ECN_MASK;
+ outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
}
- hash_del_rcu(&e->encap_hlist);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_encap_dealloc(priv, e);
-}
-static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
-{
- struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+ flow_rule_match_ip(rule, &match);
+ inner_ecn_key = match.key->tos & INET_ECN_MASK;
+ inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
+ }
- if (!flow_flag_test(flow, ESWITCH) ||
- !flow_flag_test(flow, DUP))
- return;
+ if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
- mutex_lock(&esw->offloads.peer_mutex);
- list_del(&flow->peer);
- mutex_unlock(&esw->offloads.peer_mutex);
+ if (!outer_ecn_mask) {
+ if (!inner_ecn_mask)
+ return 0;
- flow_flag_clear(flow, DUP);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev,
+ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
- if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
- mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
- kfree(flow->peer_flow);
+ if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev,
+ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
}
- flow->peer_flow = NULL;
-}
+ if (!inner_ecn_mask)
+ return 0;
-static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
-{
- struct mlx5_core_dev *dev = flow->priv->mdev;
- struct mlx5_devcom *devcom = dev->priv.devcom;
- struct mlx5_eswitch *peer_esw;
+ /* Both inner and outer have full mask on ecn */
- peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
- if (!peer_esw)
- return;
+ if (outer_ecn_key == INET_ECN_ECT_1) {
+ /* inner ecn might change by DECAP action */
- __mlx5e_tc_del_fdb_peer_flow(flow);
- mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-}
+ NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
+ netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
+ return -EOPNOTSUPP;
+ }
-static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
-{
- if (mlx5e_is_eswitch_flow(flow)) {
- mlx5e_tc_del_fdb_peer_flow(flow);
- mlx5e_tc_del_fdb_flow(priv, flow);
- } else {
- mlx5e_tc_del_nic_flow(priv, flow);
+ if (outer_ecn_key != INET_ECN_CE)
+ return 0;
+
+ if (inner_ecn_key != INET_ECN_CE) {
+ /* Can't happen in software, as packet ecn will be changed to CE after decap */
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
+ netdev_warn(priv->netdev,
+ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
+ return -EOPNOTSUPP;
}
-}
+ /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
+ * drop match on inner ecn
+ */
+ *match_inner_ecn = false;
+
+ return 0;
+}
static int parse_tunnel_attr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
- struct net_device *filter_dev, u8 *match_level)
+ struct net_device *filter_dev,
+ u8 *match_level,
+ bool *match_inner)
{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers);
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers);
- struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ bool needs_mapping, sets_mapping;
int err;
- err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
- headers_c, headers_v, match_level);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack,
- "failed to parse tunnel attributes");
- return err;
+ if (!mlx5e_is_eswitch_flow(flow)) {
+ NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
+ return -EOPNOTSUPP;
}
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
- struct flow_match_control match;
- u16 addr_type;
+ needs_mapping = !!flow->attr->chain;
+ sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
+ *match_inner = !needs_mapping;
- flow_rule_match_enc_control(rule, &match);
- addr_type = match.key->addr_type;
+ if ((needs_mapping || sets_mapping) &&
+ !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Chains on tunnel devices isn't supported without register loopback support");
+ netdev_warn(priv->netdev,
+ "Chains on tunnel devices isn't supported without register loopback support");
+ return -EOPNOTSUPP;
+ }
- /* For tunnel addr_type used same key id`s as for non-tunnel */
- if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
- struct flow_match_ipv4_addrs match;
+ if (!flow->attr->chain) {
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+ match_level);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to parse tunnel attributes");
+ netdev_warn(priv->netdev,
+ "Failed to parse tunnel attributes");
+ return err;
+ }
- flow_rule_match_enc_ipv4_addrs(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
- ntohl(match.mask->src));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
- ntohl(match.key->src));
+ /* With mpls over udp we decapsulate using packet reformat
+ * object
+ */
+ if (!netif_is_bareudp(filter_dev))
+ flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ err = mlx5e_tc_set_attr_rx_tun(flow, spec);
+ if (err)
+ return err;
+ } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ struct mlx5_flow_spec *tmp_spec;
+
+ tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
+ if (!tmp_spec) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
+ netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
+ return -ENOMEM;
+ }
+ memcpy(tmp_spec, spec, sizeof(*tmp_spec));
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
- ntohl(match.mask->dst));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
- ntohl(match.key->dst));
-
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
- ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ETH_P_IP);
- } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
- struct flow_match_ipv6_addrs match;
-
- flow_rule_match_enc_ipv6_addrs(rule, &match);
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6));
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6));
-
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
- ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ETH_P_IPV6);
+ err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
+ if (err) {
+ kvfree(tmp_spec);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
+ netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
+ return err;
}
+ err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
+ kvfree(tmp_spec);
+ if (err)
+ return err;
}
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
- struct flow_match_ip match;
-
- flow_rule_match_enc_ip(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
- match.mask->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
- match.key->tos & 0x3);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
- match.mask->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
- match.key->tos >> 2);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
- match.mask->ttl);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
- match.key->ttl);
+ if (!needs_mapping && !sets_mapping)
+ return 0;
- if (match.mask->ttl &&
- !MLX5_CAP_ESW_FLOWTABLE_FDB
- (priv->mdev,
- ft_field_support.outer_ipv4_ttl)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Matching on TTL is not supported");
- return -EOPNOTSUPP;
- }
+ return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
+}
- }
+static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ inner_headers);
+}
- /* Enforce DMAC when offloading incoming tunneled flows.
- * Flow counters require a match on the DMAC.
- */
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16), priv->netdev->dev_addr);
+static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ inner_headers);
+}
- /* let software handle IP fragments */
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+}
- return 0;
+static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
}
-static void *get_match_headers_criteria(u32 flags,
- struct mlx5_flow_spec *spec)
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
- MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- inner_headers) :
- MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers);
+ get_match_inner_headers_value(spec) :
+ get_match_outer_headers_value(spec);
}
-static void *get_match_headers_value(u32 flags,
- struct mlx5_flow_spec *spec)
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
- MLX5_ADDR_OF(fte_match_param, spec->match_value,
- inner_headers) :
- MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers);
+ get_match_inner_headers_criteria(spec) :
+ get_match_outer_headers_criteria(spec);
}
static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
@@ -1822,9 +2481,12 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
return 0;
flow_rule_match_meta(rule, &match);
+ if (!match.mask->ingress_ifindex)
+ return 0;
+
if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
- return -EINVAL;
+ return -EOPNOTSUPP;
}
ingress_dev = __dev_get_by_index(dev_net(filter_dev),
@@ -1832,19 +2494,34 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
if (!ingress_dev) {
NL_SET_ERR_MSG_MOD(extack,
"Can't find the ingress port to match on");
- return -EINVAL;
+ return -ENOENT;
}
if (ingress_dev != filter_dev) {
NL_SET_ERR_MSG_MOD(extack,
"Can't match on the ingress filter port");
- return -EINVAL;
+ return -EOPNOTSUPP;
}
return 0;
}
+static bool skip_key_basic(struct net_device *filter_dev,
+ struct flow_cls_offload *f)
+{
+ /* When doing mpls over udp decap, the user needs to provide
+ * MPLS_UC as the protocol in order to be able to match on mpls
+ * label fields. However, the actual ethertype is IP so we want to
+ * avoid matching on this, otherwise we'll fail the match.
+ */
+ if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
+ return true;
+
+ return false;
+}
+
static int __parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
struct net_device *filter_dev,
@@ -1859,13 +2536,20 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
misc_parameters);
void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
+ void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_3);
+ void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_3);
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector *dissector = rule->match.dissector;
+ enum fs_flow_table_type fs_type;
+ bool match_inner_ecn = true;
u16 addr_type = 0;
u8 ip_proto = 0;
u8 *match_level;
int err;
+ fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
match_level = outer_match_level;
if (dissector->used_keys &
@@ -1885,41 +2569,52 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_TCP) |
BIT(FLOW_DISSECTOR_KEY_IP) |
+ BIT(FLOW_DISSECTOR_KEY_CT) |
BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
- BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT(FLOW_DISSECTOR_KEY_ICMP) |
+ BIT(FLOW_DISSECTOR_KEY_MPLS))) {
NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
- netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
- dissector->used_keys);
+ netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
+ dissector->used_keys);
return -EOPNOTSUPP;
}
if (mlx5e_get_tc_tun(filter_dev)) {
- if (parse_tunnel_attr(priv, spec, f, filter_dev,
- outer_match_level))
- return -EOPNOTSUPP;
+ bool match_inner = false;
- /* At this point, header pointers should point to the inner
- * headers, outer header were already set by parse_tunnel_attr
- */
- match_level = inner_match_level;
- headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
- spec);
- headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
- spec);
+ err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
+ outer_match_level, &match_inner);
+ if (err)
+ return err;
+
+ if (match_inner) {
+ /* header pointers should point to the inner headers
+ * if the packet was decapsulated already.
+ * outer headers are set by parse_tunnel_attr.
+ */
+ match_level = inner_match_level;
+ headers_c = get_match_inner_headers_criteria(spec);
+ headers_v = get_match_inner_headers_value(spec);
+ }
+
+ err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
+ if (err)
+ return err;
}
err = mlx5e_flower_parse_meta(filter_dev, f);
if (err)
return err;
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
+ !skip_key_basic(filter_dev, f)) {
struct flow_match_basic match;
flow_rule_match_basic(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
- ntohs(match.mask->n_proto));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ntohs(match.key->n_proto));
+ mlx5e_tc_set_ethertype(priv->mdev, &match,
+ match_level == outer_match_level,
+ headers_c, headers_v);
if (match.mask->n_proto)
*match_level = MLX5_MATCH_L2;
@@ -1967,6 +2662,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
match.key->vlan_priority);
*match_level = MLX5_MATCH_L2;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
+ match.mask->vlan_eth_type &&
+ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
+ ft_field_support.outer_second_vid,
+ fs_type)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ spec->match_criteria_enable |=
+ MLX5_MATCH_MISC_PARAMETERS;
+ }
}
} else if (*match_level != MLX5_MATCH_NONE) {
/* cvlan_tag enabled in match criteria and
@@ -1984,6 +2690,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
if (match.mask->vlan_id ||
match.mask->vlan_priority ||
match.mask->vlan_tpid) {
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
+ fs_type)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on CVLAN is not supported");
+ return -EOPNOTSUPP;
+ }
+
if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
MLX5_SET(fte_match_set_misc, misc_c,
outer_second_svlan_tag, 1);
@@ -2006,6 +2719,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
match.key->vlan_priority);
*match_level = MLX5_MATCH_L2;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
}
}
@@ -2039,8 +2753,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
addr_type = match.key->addr_type;
/* the HW doesn't support frag first/later */
- if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
+ if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
+ NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
return -EOPNOTSUPP;
+ }
if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
@@ -2119,10 +2835,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct flow_match_ip match;
flow_rule_match_ip(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
- match.mask->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
- match.key->tos & 0x3);
+ if (match_inner_ecn) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
+ }
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
match.mask->tos >> 2);
@@ -2200,6 +2918,64 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
if (match.mask->flags)
*match_level = MLX5_MATCH_L4;
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+ struct flow_match_icmp match;
+
+ flow_rule_match_icmp(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_ICMP)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on Flex protocols for ICMP is not supported");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
+ match.mask->type);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
+ match.key->type);
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
+ match.mask->code);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
+ match.key->code);
+ break;
+ case IPPROTO_ICMPV6:
+ if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_ICMPV6)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on Flex protocols for ICMPV6 is not supported");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
+ match.mask->type);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
+ match.key->type);
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
+ match.mask->code);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
+ match.key->code);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Code and type matching only with ICMP and ICMPv6");
+ netdev_err(priv->netdev,
+ "Code and type matching only with ICMP and ICMPv6\n");
+ return -EINVAL;
+ }
+ if (match.mask->code || match.mask->type) {
+ *match_level = MLX5_MATCH_L4;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+ }
+ }
+ /* Currently supported only for MPLS over UDP */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
+ !netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on MPLS is supported only for MPLS over UDP");
+ netdev_err(priv->netdev,
+ "Matching on MPLS is supported only for MPLS over UDP\n");
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -2222,8 +2998,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
inner_match_level = MLX5_MATCH_NONE;
outer_match_level = MLX5_MATCH_NONE;
- err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
- &outer_match_level);
+ err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
+ &inner_match_level, &outer_match_level);
non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
outer_match_level : inner_match_level;
@@ -2242,59 +3018,11 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
}
}
- if (is_eswitch_flow) {
- flow->esw_attr->inner_match_level = inner_match_level;
- flow->esw_attr->outer_match_level = outer_match_level;
- } else {
- flow->nic_attr->match_level = non_tunnel_match_level;
- }
-
- return err;
-}
+ flow->attr->inner_match_level = inner_match_level;
+ flow->attr->outer_match_level = outer_match_level;
-struct pedit_headers {
- struct ethhdr eth;
- struct vlan_hdr vlan;
- struct iphdr ip4;
- struct ipv6hdr ip6;
- struct tcphdr tcp;
- struct udphdr udp;
-};
-struct pedit_headers_action {
- struct pedit_headers vals;
- struct pedit_headers masks;
- u32 pedits;
-};
-
-static int pedit_header_offsets[] = {
- [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
- [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
- [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
- [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
- [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
-};
-
-#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
-
-static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
- struct pedit_headers_action *hdrs)
-{
- u32 *curr_pmask, *curr_pval;
-
- curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
- curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
-
- if (*curr_pmask & mask) /* disallow acting twice on the same location */
- goto out_err;
-
- *curr_pmask |= mask;
- *curr_pval |= (val & mask);
-
- return 0;
-
-out_err:
- return -EOPNOTSUPP;
+ return err;
}
struct mlx5_fields {
@@ -2373,6 +3101,7 @@ static struct mlx5_fields fields[] = {
OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
+ OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
@@ -2383,40 +3112,48 @@ static struct mlx5_fields fields[] = {
OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
};
-/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
- * max from the SW pedit action. On success, attr->num_mod_hdr_actions
- * says how many HW actions were actually parsed.
- */
-static int offload_pedit_fields(struct pedit_headers_action *hdrs,
+static unsigned long mask_to_le(unsigned long mask, int size)
+{
+ __be32 mask_be32;
+ __be16 mask_be16;
+
+ if (size == 32) {
+ mask_be32 = (__force __be32)(mask);
+ mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
+ } else if (size == 16) {
+ mask_be32 = (__force __be32)(mask);
+ mask_be16 = *(__be16 *)&mask_be32;
+ mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+ }
+
+ return mask;
+}
+
+static int offload_pedit_fields(struct mlx5e_priv *priv,
+ int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
- int i, action_size, nactions, max_actions, first, last, next_z;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
void *headers_c, *headers_v, *action, *vals_p;
u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ unsigned long mask, field_mask;
+ int i, first, last, next_z;
struct mlx5_fields *f;
- unsigned long mask;
- __be32 mask_be32;
- __be16 mask_be16;
u8 cmd;
- headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
- headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
+ mod_acts = &parse_attr->mod_hdr_acts;
+ headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
set_masks = &hdrs[0].masks;
add_masks = &hdrs[1].masks;
set_vals = &hdrs[0].vals;
add_vals = &hdrs[1].vals;
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
- action = parse_attr->mod_hdr_actions +
- parse_attr->num_mod_hdr_actions * action_size;
-
- max_actions = parse_attr->max_mod_hdr_actions;
- nactions = parse_attr->num_mod_hdr_actions;
-
for (i = 0; i < ARRAY_SIZE(fields); i++) {
bool skip;
@@ -2437,14 +3174,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
if (s_mask && a_mask) {
NL_SET_ERR_MSG_MOD(extack,
"can't set and add to the same HW field");
- printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
- return -EOPNOTSUPP;
- }
-
- if (nactions == max_actions) {
- NL_SET_ERR_MSG_MOD(extack,
- "too many pedit actions, can't offload");
- printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
+ netdev_warn(priv->netdev,
+ "mlx5: can't set and add to the same HW field (%x)\n",
+ f->field);
return -EOPNOTSUPP;
}
@@ -2475,13 +3207,7 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
if (skip)
continue;
- if (f->field_bsize == 32) {
- mask_be32 = *(__be32 *)&mask;
- mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
- } else if (f->field_bsize == 16) {
- mask_be16 = *(__be16 *)&mask;
- mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
- }
+ mask = mask_to_le(mask, f->field_bsize);
first = find_first_bit(&mask, f->field_bsize);
next_z = find_next_zero_bit(&mask, f->field_bsize, first);
@@ -2489,20 +3215,32 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
if (first < next_z && next_z < last) {
NL_SET_ERR_MSG_MOD(extack,
"rewrite of few sub-fields isn't supported");
- printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
- mask);
+ netdev_warn(priv->netdev,
+ "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
+ mask);
return -EOPNOTSUPP;
}
+ action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
+ if (IS_ERR(action)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "too many pedit actions, can't offload");
+ mlx5_core_warn(priv->mdev,
+ "mlx5: parsed %d pedit actions, can't do more\n",
+ mod_acts->num_actions);
+ return PTR_ERR(action);
+ }
+
MLX5_SET(set_action_in, action, action_type, cmd);
MLX5_SET(set_action_in, action, field, f->field);
if (cmd == MLX5_ACTION_TYPE_SET) {
int start;
+ field_mask = mask_to_le(f->field_mask, f->field_bsize);
+
/* if field is bit sized it can start not from first bit */
- start = find_first_bit((unsigned long *)&f->field_mask,
- f->field_bsize);
+ start = find_first_bit(&field_mask, f->field_bsize);
MLX5_SET(set_action_in, action, offset, first - start);
/* length is num of bits to be written, zero means length of 32 */
@@ -2516,158 +3254,57 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
else if (f->field_bsize == 8)
MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
- action += action_size;
- nactions++;
+ ++mod_acts->num_actions;
}
- parse_attr->num_mod_hdr_actions = nactions;
- return 0;
-}
-
-static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
- int namespace)
-{
- if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
- return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
- else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
- return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
-}
-
-static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
- struct pedit_headers_action *hdrs,
- int namespace,
- struct mlx5e_tc_flow_parse_attr *parse_attr)
-{
- int nkeys, action_size, max_actions;
-
- nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
- hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
-
- max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
- /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
- max_actions = min(max_actions, nkeys * 16);
-
- parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
- if (!parse_attr->mod_hdr_actions)
- return -ENOMEM;
-
- parse_attr->max_mod_hdr_actions = max_actions;
return 0;
}
static const struct pedit_headers zero_masks = {};
-static int parse_tc_pedit_action(struct mlx5e_priv *priv,
- const struct flow_action_entry *act, int namespace,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- struct netlink_ext_ack *extack)
+static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
{
- u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
- int err = -EOPNOTSUPP;
- u32 mask, val, offset;
- u8 htype;
-
- htype = act->mangle.htype;
- err = -EOPNOTSUPP; /* can't be all optimistic */
-
- if (htype == FLOW_ACT_MANGLE_UNSPEC) {
- NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
- goto out_err;
- }
+ struct pedit_headers *cmd_masks;
+ u8 cmd;
- if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
- NL_SET_ERR_MSG_MOD(extack,
- "The pedit offload action is not supported");
- goto out_err;
+ for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
+ cmd_masks = &parse_attr->hdrs[cmd].masks;
+ if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+ NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
+ netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
+ print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
+ 16, 1, cmd_masks, sizeof(zero_masks), true);
+ return -EOPNOTSUPP;
+ }
}
- mask = act->mangle.mask;
- val = act->mangle.val;
- offset = act->mangle.offset;
-
- err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
- if (err)
- goto out_err;
-
- hdrs[cmd].pedits++;
-
return 0;
-out_err:
- return err;
}
static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
u32 *action_flags,
struct netlink_ext_ack *extack)
{
- struct pedit_headers *cmd_masks;
int err;
- u8 cmd;
- if (!parse_attr->mod_hdr_actions) {
- err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
- if (err)
- goto out_err;
- }
-
- err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
- if (err < 0)
+ err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
+ if (err)
goto out_dealloc_parsed_actions;
- for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
- cmd_masks = &hdrs[cmd].masks;
- if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
- NL_SET_ERR_MSG_MOD(extack,
- "attempt to offload an unsupported field");
- netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
- print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
- 16, 1, cmd_masks, sizeof(zero_masks), true);
- err = -EOPNOTSUPP;
- goto out_dealloc_parsed_actions;
- }
- }
+ err = verify_offload_pedit_fields(priv, parse_attr, extack);
+ if (err)
+ goto out_dealloc_parsed_actions;
return 0;
out_dealloc_parsed_actions:
- kfree(parse_attr->mod_hdr_actions);
-out_err:
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
return err;
}
-static bool csum_offload_supported(struct mlx5e_priv *priv,
- u32 action,
- u32 update_flags,
- struct netlink_ext_ack *extack)
-{
- u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
- TCA_CSUM_UPDATE_FLAG_UDP;
-
- /* The HW recalcs checksums only if re-writing headers */
- if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
- NL_SET_ERR_MSG_MOD(extack,
- "TC csum action is only offloaded with pedit");
- netdev_warn(priv->netdev,
- "TC csum action is only offloaded with pedit\n");
- return false;
- }
-
- if (update_flags & ~prot_flags) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload TC csum action for some header/s");
- netdev_warn(priv->netdev,
- "can't offload TC csum action for some header/s - flags %#x\n",
- update_flags);
- return false;
- }
-
- return true;
-}
-
struct ip_ttl_word {
__u8 ttl;
__u8 protocol;
@@ -2680,7 +3317,10 @@ struct ipv6_hoplimit_word {
__u8 hop_limit;
};
-static bool is_action_keys_supported(const struct flow_action_entry *act)
+static bool
+is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
+ bool *modify_ip_header, bool *modify_tuple,
+ struct netlink_ext_ack *extack)
{
u32 mask, offset;
u8 htype;
@@ -2699,7 +3339,16 @@ static bool is_action_keys_supported(const struct flow_action_entry *act)
if (offset != offsetof(struct iphdr, ttl) ||
ttl_word->protocol ||
ttl_word->check) {
- return true;
+ *modify_ip_header = true;
+ }
+
+ if (offset >= offsetof(struct iphdr, saddr))
+ *modify_tuple = true;
+
+ if (ct_flow && *modify_tuple) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of ipv4 address with action ct");
+ return false;
}
} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
struct ipv6_hoplimit_word *hoplimit_word =
@@ -2708,49 +3357,109 @@ static bool is_action_keys_supported(const struct flow_action_entry *act)
if (offset != offsetof(struct ipv6hdr, payload_len) ||
hoplimit_word->payload_len ||
hoplimit_word->nexthdr) {
- return true;
+ *modify_ip_header = true;
+ }
+
+ if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
+ *modify_tuple = true;
+
+ if (ct_flow && *modify_tuple) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of ipv6 address with action ct");
+ return false;
+ }
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
+ htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
+ *modify_tuple = true;
+ if (ct_flow) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of transport header ports with action ct");
+ return false;
}
}
- return false;
+
+ return true;
}
-static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
+ bool ct_flow, struct netlink_ext_ack *extack,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec)
+{
+ if (!modify_tuple || ct_clear)
+ return true;
+
+ if (ct_flow) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload tuple modification with non-clear ct()");
+ netdev_info(priv->netdev,
+ "can't offload tuple modification with non-clear ct()");
+ return false;
+ }
+
+ /* Add ct_state=-trk match so it will be offloaded for non ct flows
+ * (or after clear action), as otherwise, since the tuple is changed,
+ * we can't restore ct state
+ */
+ if (mlx5_tc_ct_add_no_trk_match(spec)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload tuple modification with ct matches and no ct(clear) action");
+ netdev_info(priv->netdev,
+ "can't offload tuple modification with ct matches and no ct(clear) action");
+ return false;
+ }
+
+ return true;
+}
+
+static bool modify_header_match_supported(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
struct flow_action *flow_action,
- u32 actions,
+ u32 actions, bool ct_flow,
+ bool ct_clear,
struct netlink_ext_ack *extack)
{
const struct flow_action_entry *act;
- bool modify_ip_header;
+ bool modify_ip_header, modify_tuple;
+ void *headers_c;
void *headers_v;
u16 ethertype;
u8 ip_proto;
int i;
- headers_v = get_match_headers_value(actions, spec);
+ headers_c = mlx5e_get_match_headers_criteria(actions, spec);
+ headers_v = mlx5e_get_match_headers_value(actions, spec);
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */
- if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
+ if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
+ ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
goto out_ok;
modify_ip_header = false;
+ modify_tuple = false;
flow_action_for_each(i, act, flow_action) {
if (act->id != FLOW_ACTION_MANGLE &&
act->id != FLOW_ACTION_ADD)
continue;
- if (is_action_keys_supported(act)) {
- modify_ip_header = true;
- break;
- }
+ if (!is_action_keys_supported(act, ct_flow,
+ &modify_ip_header,
+ &modify_tuple, extack))
+ return false;
}
+ if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
+ priv, spec))
+ return false;
+
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
if (modify_ip_header && ip_proto != IPPROTO_TCP &&
ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
NL_SET_ERR_MSG_MOD(extack,
"can't offload re-write of non TCP/UDP");
- pr_info("can't offload re-write of ip proto %d\n", ip_proto);
+ netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
+ ip_proto);
return false;
}
@@ -2758,838 +3467,655 @@ out_ok:
return true;
}
-static bool actions_match_supported(struct mlx5e_priv *priv,
- struct flow_action *flow_action,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+static bool
+actions_match_supported_fdb(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
- u32 actions;
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ bool ct_flow, ct_clear;
- if (mlx5e_is_eswitch_flow(flow))
- actions = flow->esw_attr->action;
- else
- actions = flow->nic_attr->action;
+ ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
- if (flow_flag_test(flow, EGRESS) &&
- !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
- (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
- (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
+ if (esw_attr->split_count && ct_flow &&
+ !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
+ /* All registers used by ct are cleared when using
+ * split rules.
+ */
+ NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
return false;
+ }
- if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return modify_header_match_supported(&parse_attr->spec,
- flow_action, actions,
- extack);
+ if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "current firmware doesn't support split rule for port mirroring");
+ netdev_warn_once(priv->netdev,
+ "current firmware doesn't support split rule for port mirroring\n");
+ return false;
+ }
return true;
}
-static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+static bool
+actions_match_supported(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ u32 actions,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
- struct mlx5_core_dev *fmdev, *pmdev;
- u64 fsystem_guid, psystem_guid;
-
- fmdev = priv->mdev;
- pmdev = peer_priv->mdev;
-
- fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
- psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
+ bool ct_flow, ct_clear;
- return (fsystem_guid == psystem_guid);
-}
+ ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
-static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
- const struct flow_action_entry *act,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- u32 *action, struct netlink_ext_ack *extack)
-{
- u16 mask16 = VLAN_VID_MASK;
- u16 val16 = act->vlan.vid & VLAN_VID_MASK;
- const struct flow_action_entry pedit_act = {
- .id = FLOW_ACTION_MANGLE,
- .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
- .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
- .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
- .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
- };
- u8 match_prio_mask, match_prio_val;
- void *headers_c, *headers_v;
- int err;
+ if (!(actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
+ return false;
+ }
- headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
- headers_v = get_match_headers_value(*action, &parse_attr->spec);
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
- if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
- MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
- NL_SET_ERR_MSG_MOD(extack,
- "VLAN rewrite action must have VLAN protocol match");
- return -EOPNOTSUPP;
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
}
- match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
- match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
- if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Changing VLAN prio is not supported");
- return -EOPNOTSUPP;
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
}
- err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr,
- hdrs, NULL);
- *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
- return err;
-}
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
+ actions, ct_flow, ct_clear, extack))
+ return false;
-static int
-add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- u32 *action, struct netlink_ext_ack *extack)
-{
- const struct flow_action_entry prio_tag_act = {
- .vlan.vid = 0,
- .vlan.prio =
- MLX5_GET(fte_match_set_lyr_2_4,
- get_match_headers_value(*action,
- &parse_attr->spec),
- first_prio) &
- MLX5_GET(fte_match_set_lyr_2_4,
- get_match_headers_criteria(*action,
- &parse_attr->spec),
- first_prio),
- };
+ if (mlx5e_is_eswitch_flow(flow) &&
+ !actions_match_supported_fdb(priv, parse_attr, flow, extack))
+ return false;
- return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
- &prio_tag_act, parse_attr, hdrs, action,
- extack);
+ return true;
}
-static int parse_tc_nic_actions(struct mlx5e_priv *priv,
- struct flow_action *flow_action,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
{
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
- struct pedit_headers_action hdrs[2] = {};
- const struct flow_action_entry *act;
- u32 action = 0;
- int err, i;
+ return priv->mdev == peer_priv->mdev;
+}
- if (!flow_action_has_entries(flow_action))
- return -EINVAL;
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *fmdev, *pmdev;
+ u64 fsystem_guid, psystem_guid;
- attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ fmdev = priv->mdev;
+ pmdev = peer_priv->mdev;
- flow_action_for_each(i, act, flow_action) {
- switch (act->id) {
- case FLOW_ACTION_ACCEPT:
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- case FLOW_ACTION_DROP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- if (MLX5_CAP_FLOWTABLE(priv->mdev,
- flow_table_properties_nic_receive.flow_counter))
- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- case FLOW_ACTION_MANGLE:
- case FLOW_ACTION_ADD:
- err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr, hdrs, extack);
- if (err)
- return err;
+ fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
+ psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- break;
- case FLOW_ACTION_VLAN_MANGLE:
- err = add_vlan_rewrite_action(priv,
- MLX5_FLOW_NAMESPACE_KERNEL,
- act, parse_attr, hdrs,
- &action, extack);
- if (err)
- return err;
+ return (fsystem_guid == psystem_guid);
+}
- break;
- case FLOW_ACTION_CSUM:
- if (csum_offload_supported(priv, action,
- act->csum_flags,
- extack))
- break;
+static int
+actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
- return -EOPNOTSUPP;
- case FLOW_ACTION_REDIRECT: {
- struct net_device *peer_dev = act->dev;
-
- if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
- same_hw_devs(priv, netdev_priv(peer_dev))) {
- parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
- flow_flag_set(flow, HAIRPIN);
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "device is not on same HW, can't offload");
- netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
- peer_dev->name);
- return -EINVAL;
- }
- }
- break;
- case FLOW_ACTION_MARK: {
- u32 mark = act->mark;
+ if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
+ !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
+ return 0;
- if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
- NL_SET_ERR_MSG_MOD(extack,
- "Bad flow mark - only 16 bit is supported");
- return -EINVAL;
- }
+ ns_type = mlx5e_get_flow_namespace(flow);
- attr->flow_tag = mark;
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
- break;
- default:
- NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
- return -EOPNOTSUPP;
- }
- }
+ err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
+ if (err)
+ return err;
- if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
- hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
- err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr, hdrs, &action, extack);
- if (err)
- return err;
- /* in case all pedit actions are skipped, remove the MOD_HDR
- * flag.
- */
- if (parse_attr->num_mod_hdr_actions == 0) {
- action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- kfree(parse_attr->mod_hdr_actions);
- }
- }
+ if (parse_attr->mod_hdr_acts.num_actions > 0)
+ return 0;
- attr->action = action;
- if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
- return -EOPNOTSUPP;
+ /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
- return 0;
-}
+ if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
+ return 0;
-struct encap_key {
- const struct ip_tunnel_key *ip_tun_key;
- struct mlx5e_tc_tunnel *tc_tunnel;
-};
+ if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
+ attr->esw_attr->split_count = 0;
-static inline int cmp_encap_info(struct encap_key *a,
- struct encap_key *b)
-{
- return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
- a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
+ return 0;
}
-static inline int hash_encap_info(struct encap_key *key)
+static struct mlx5_flow_attr*
+mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
{
- return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
- key->tc_tunnel->tunnel_type);
-}
-
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ u32 attr_sz = ns_to_attr_sz(ns_type);
+ struct mlx5_flow_attr *attr2;
-static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
- struct net_device *peer_netdev)
-{
- struct mlx5e_priv *peer_priv;
+ attr2 = mlx5_alloc_flow_attr(ns_type);
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+ if (!attr2 || !parse_attr) {
+ kvfree(parse_attr);
+ kfree(attr2);
+ return NULL;
+ }
- peer_priv = netdev_priv(peer_netdev);
+ memcpy(attr2, attr, attr_sz);
+ INIT_LIST_HEAD(&attr2->list);
+ parse_attr->filter_dev = attr->parse_attr->filter_dev;
+ attr2->action = 0;
+ attr2->flags = 0;
+ attr2->parse_attr = parse_attr;
+ attr2->dest_chain = 0;
+ attr2->dest_ft = NULL;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ attr2->esw_attr->out_count = 0;
+ attr2->esw_attr->split_count = 0;
+ }
- return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
- mlx5e_eswitch_rep(priv->netdev) &&
- mlx5e_eswitch_rep(peer_netdev) &&
- same_hw_devs(priv, peer_priv));
+ return attr2;
}
-
-
-bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+static struct mlx5_core_dev *
+get_flow_counter_dev(struct mlx5e_tc_flow *flow)
{
- return refcount_inc_not_zero(&e->refcnt);
+ return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
}
-static struct mlx5e_encap_entry *
-mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
- uintptr_t hash_key)
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_encap_entry *e;
- struct encap_key e_key;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
+ int i;
- hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
- encap_hlist, hash_key) {
- e_key.ip_tun_key = &e->tun_info->key;
- e_key.tc_tunnel = e->tunnel;
- if (!cmp_encap_info(&e_key, key) &&
- mlx5e_encap_take(e))
- return e;
+ list_for_each_entry(attr, &flow->attrs, list) {
+ esw_attr = attr->esw_attr;
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
+ return attr;
+ }
}
return NULL;
}
-static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
+void
+mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
{
- size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr;
- return kmemdup(tun_info, tun_size, GFP_KERNEL);
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
+
+ mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
+ }
}
-static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- int out_index,
- struct mlx5e_encap_entry *e,
- struct netlink_ext_ack *extack)
+static void
+free_flow_post_acts(struct mlx5e_tc_flow *flow)
{
- int i;
+ struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr, *tmp;
+ bool vf_tun;
- for (i = 0; i < out_index; i++) {
- if (flow->encaps[i].e != e)
- continue;
- NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
- netdev_err(priv->netdev, "can't duplicate encap action\n");
- return true;
- }
+ list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
- return false;
+ if (attr->post_act_handle)
+ mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
+
+ clean_encap_dests(flow->priv, flow, attr, &vf_tun);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(counter_dev, attr->counter);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ if (attr->modify_hdr)
+ mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
+ }
+
+ list_del(&attr->list);
+ kvfree(attr->parse_attr);
+ kfree(attr);
+ }
}
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *mirred_dev,
- int out_index,
- struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid)
+int
+mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
- const struct ip_tunnel_info *tun_info;
- struct encap_key key;
- struct mlx5e_encap_entry *e;
- unsigned short family;
- uintptr_t hash_key;
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr;
int err = 0;
- parse_attr = attr->parse_attr;
- tun_info = parse_attr->tun_info[out_index];
- family = ip_tunnel_info_af(tun_info);
- key.ip_tun_key = &tun_info->key;
- key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
- if (!key.tc_tunnel) {
- NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
- return -EOPNOTSUPP;
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
+
+ err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
+ if (err)
+ break;
}
- hash_key = hash_encap_info(&key);
+ return err;
+}
- mutex_lock(&esw->offloads.encap_tbl_lock);
- e = mlx5e_encap_get(priv, &key, hash_key);
+/* TC filter rule HW translation:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * |
+ * | if multi table action
+ * |
+ * v
+ * +---------------------+
+ * + post act ft |<----.
+ * + match fte id | | split on multi table action
+ * + do actions |-----'
+ * +---------------------+
+ * |
+ * |
+ * v
+ * Do rest of the actions after last multi table action.
+ */
+static int
+alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr, *next_attr = NULL;
+ struct mlx5e_post_act_handle *handle;
+ bool vf_tun, encap_valid = true;
+ int err;
- /* must verify if encap is valid or not */
- if (e) {
- /* Check that entry was not already attached to this flow */
- if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
- err = -EOPNOTSUPP;
- goto out_err;
+ /* This is going in reverse order as needed.
+ * The first entry is the last attribute.
+ */
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (!next_attr) {
+ /* Set counter action on last post act rule. */
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else {
+ err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
+ if (err)
+ goto out_free;
}
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- wait_for_completion(&e->res_ready);
+ /* Don't add post_act rule for first attr (last in the list).
+ * It's being handled by the caller.
+ */
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- if (e->compl_result < 0) {
- err = -EREMOTEIO;
- goto out_err;
- }
- goto attach_flow;
- }
+ err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
+ if (err)
+ goto out_free;
- e = kzalloc(sizeof(*e), GFP_KERNEL);
- if (!e) {
- err = -ENOMEM;
- goto out_err;
- }
+ if (!encap_valid)
+ flow_flag_set(flow, SLOW);
- refcount_set(&e->refcnt, 1);
- init_completion(&e->res_ready);
+ err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
+ if (err)
+ goto out_free;
- tun_info = dup_tun_info(tun_info);
- if (!tun_info) {
- err = -ENOMEM;
- goto out_err_init;
- }
- e->tun_info = tun_info;
- err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
- if (err)
- goto out_err_init;
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
+ if (err)
+ goto out_free;
+ }
- INIT_LIST_HEAD(&e->flows);
- hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
+ if (err)
+ goto out_free;
+ }
- if (family == AF_INET)
- err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
- else if (family == AF_INET6)
- err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+ handle = mlx5e_tc_post_act_add(post_act, attr);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_free;
+ }
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- complete_all(&e->res_ready);
- if (err) {
- e->compl_result = err;
- goto out_err;
+ attr->post_act_handle = handle;
+ next_attr = attr;
}
- e->compl_result = 1;
-attach_flow:
- flow->encaps[out_index].e = e;
- list_add(&flow->encaps[out_index].list, &e->flows);
- flow->encaps[out_index].index = out_index;
- *encap_dev = e->out_dev;
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
- attr->dests[out_index].pkt_reformat = e->pkt_reformat;
- attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- *encap_valid = true;
- } else {
- *encap_valid = false;
- }
- mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (flow_flag_test(flow, SLOW))
+ goto out;
- return err;
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err)
+ goto out_free;
-out_err:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- if (e)
- mlx5e_encap_put(priv, e);
- return err;
+out:
+ return 0;
-out_err_init:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- kfree(tun_info);
- kfree(e);
+out_free:
+ free_flow_post_acts(flow);
return err;
}
-static int parse_tc_vlan_action(struct mlx5e_priv *priv,
- const struct flow_action_entry *act,
- struct mlx5_esw_flow_attr *attr,
- u32 *action)
+static int
+parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action)
{
- u8 vlan_idx = attr->total_vlan;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_action flow_action_reorder;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5_flow_attr *attr = flow->attr;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_priv *priv = flow->priv;
+ struct flow_action_entry *act, **_act;
+ struct mlx5e_tc_act *tc_act;
+ int err, i;
- if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
- return -EOPNOTSUPP;
+ flow_action_reorder.num_entries = flow_action->num_entries;
+ flow_action_reorder.entries = kcalloc(flow_action->num_entries,
+ sizeof(flow_action), GFP_KERNEL);
+ if (!flow_action_reorder.entries)
+ return -ENOMEM;
- switch (act->id) {
- case FLOW_ACTION_VLAN_POP:
- if (vlan_idx) {
- if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
- MLX5_FS_VLAN_DEPTH))
- return -EOPNOTSUPP;
+ mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
- } else {
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ ns_type = mlx5e_get_flow_namespace(flow);
+ list_add(&attr->list, &flow->attrs);
+
+ flow_action_for_each(i, _act, &flow_action_reorder) {
+ act = *_act;
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act) {
+ NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
+ err = -EOPNOTSUPP;
+ goto out_free;
}
- break;
- case FLOW_ACTION_VLAN_PUSH:
- attr->vlan_vid[vlan_idx] = act->vlan.vid;
- attr->vlan_prio[vlan_idx] = act->vlan.prio;
- attr->vlan_proto[vlan_idx] = act->vlan.proto;
- if (!attr->vlan_proto[vlan_idx])
- attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
-
- if (vlan_idx) {
- if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
- MLX5_FS_VLAN_DEPTH))
- return -EOPNOTSUPP;
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
- } else {
- if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
- (act->vlan.proto != htons(ETH_P_8021Q) ||
- act->vlan.prio))
- return -EOPNOTSUPP;
+ if (!tc_act->can_offload(parse_state, act, i, attr)) {
+ err = -EOPNOTSUPP;
+ goto out_free;
+ }
+
+ err = tc_act->parse_action(parse_state, act, priv, attr);
+ if (err)
+ goto out_free;
+
+ parse_state->actions |= attr->action;
- *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ /* Split attr for multi table act if not the last act. */
+ if (tc_act->is_multi_table_act &&
+ tc_act->is_multi_table_act(priv, act, attr) &&
+ i < flow_action_reorder.num_entries - 1) {
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ if (err)
+ goto out_free;
+
+ attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ list_add(&attr->list, &flow->attrs);
}
- break;
- default:
+ }
+
+ kfree(flow_action_reorder.entries);
+
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ if (err)
+ goto out_free_post_acts;
+
+ err = alloc_flow_post_acts(flow, extack);
+ if (err)
+ goto out_free_post_acts;
+
+ return 0;
+
+out_free:
+ kfree(flow_action_reorder.entries);
+out_free_post_acts:
+ free_flow_post_acts(flow);
+
+ return err;
+}
+
+static int
+flow_action_supported(struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ if (!flow_action_has_entries(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
return -EINVAL;
}
- attr->total_vlan = vlan_idx + 1;
+ if (!flow_action_hw_stats_check(flow_action, extack,
+ FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
+ return -EOPNOTSUPP;
+ }
return 0;
}
-static int add_vlan_push_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
- struct net_device **out_dev,
- u32 *action)
+static int
+parse_tc_nic_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
- struct net_device *vlan_dev = *out_dev;
- struct flow_action_entry vlan_act = {
- .id = FLOW_ACTION_VLAN_PUSH,
- .vlan.vid = vlan_dev_vlan_id(vlan_dev),
- .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
- .vlan.prio = 0,
- };
+ struct mlx5e_tc_act_parse_state *parse_state;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
int err;
- err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ err = flow_action_supported(flow_action, extack);
if (err)
return err;
- *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
- dev_get_iflink(vlan_dev));
- if (is_vlan_dev(*out_dev))
- err = add_vlan_push_action(priv, attr, out_dev, action);
+ attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ parse_attr = attr->parse_attr;
+ parse_state = &parse_attr->parse_state;
+ mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
+ parse_state->ct_priv = get_ct_priv(priv);
- return err;
+ err = parse_tc_actions(parse_state, flow_action);
+ if (err)
+ return err;
+
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
+ if (err)
+ return err;
+
+ if (!actions_match_supported(priv, flow_action, parse_state->actions,
+ parse_attr, flow, extack))
+ return -EOPNOTSUPP;
+
+ return 0;
}
-static int add_vlan_pop_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
- u32 *action)
+static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
{
- int nest_level = attr->parse_attr->filter_dev->lower_level;
- struct flow_action_entry vlan_act = {
- .id = FLOW_ACTION_VLAN_POP,
- };
- int err = 0;
+ struct mlx5e_priv *peer_priv;
- while (nest_level--) {
- err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
- if (err)
- return err;
- }
+ peer_priv = netdev_priv(peer_netdev);
- return err;
+ return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
+ mlx5e_eswitch_vf_rep(priv->netdev) &&
+ mlx5e_eswitch_vf_rep(peer_netdev) &&
+ mlx5e_same_hw_devs(priv, peer_priv));
}
-bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
- struct net_device *out_dev)
+static bool same_hw_reps(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
{
- if (is_merged_eswitch_dev(priv, out_dev))
- return true;
+ struct mlx5e_priv *peer_priv;
- return mlx5e_eswitch_rep(out_dev) &&
- same_hw_devs(priv, netdev_priv(out_dev));
+ peer_priv = netdev_priv(peer_netdev);
+
+ return mlx5e_eswitch_rep(priv->netdev) &&
+ mlx5e_eswitch_rep(peer_netdev) &&
+ mlx5e_same_hw_devs(priv, peer_priv);
}
-static bool is_duplicated_output_device(struct net_device *dev,
- struct net_device *out_dev,
- int *ifindexes, int if_count,
- struct netlink_ext_ack *extack)
+static bool is_lag_dev(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
{
- int i;
+ return ((mlx5_lag_is_sriov(priv->mdev) ||
+ mlx5_lag_is_multipath(priv->mdev)) &&
+ same_hw_reps(priv, peer_netdev));
+}
- for (i = 0; i < if_count; i++) {
- if (ifindexes[i] == out_dev->ifindex) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't duplicate output to same device");
- netdev_err(dev, "can't duplicate output to same device: %s\n",
- out_dev->name);
- return true;
- }
- }
+static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ if (same_hw_reps(priv, out_dev) &&
+ MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ return true;
return false;
}
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
- struct flow_action *flow_action,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev)
{
- struct pedit_headers_action hdrs[2] = {};
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- const struct ip_tunnel_info *info = NULL;
- int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
- bool ft_flow = mlx5e_is_ft_flow(flow);
- const struct flow_action_entry *act;
- int err, i, if_count = 0;
- bool encap = false;
- u32 action = 0;
+ if (is_merged_eswitch_vfs(priv, out_dev))
+ return true;
- if (!flow_action_has_entries(flow_action))
- return -EINVAL;
+ if (is_multiport_eligible(priv, out_dev))
+ return true;
- flow_action_for_each(i, act, flow_action) {
- switch (act->id) {
- case FLOW_ACTION_DROP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- case FLOW_ACTION_MANGLE:
- case FLOW_ACTION_ADD:
- err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr, hdrs, extack);
- if (err)
- return err;
+ if (is_lag_dev(priv, out_dev))
+ return true;
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->split_count = attr->out_count;
- break;
- case FLOW_ACTION_CSUM:
- if (csum_offload_supported(priv, action,
- act->csum_flags, extack))
- break;
+ return mlx5e_eswitch_rep(out_dev) &&
+ same_port_devs(priv, netdev_priv(out_dev));
+}
- return -EOPNOTSUPP;
- case FLOW_ACTION_REDIRECT:
- case FLOW_ACTION_MIRRED: {
- struct mlx5e_priv *out_priv;
- struct net_device *out_dev;
-
- out_dev = act->dev;
- if (!out_dev) {
- /* out_dev is NULL when filters with
- * non-existing mirred device are replayed to
- * the driver.
- */
- return -EINVAL;
- }
+int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type,
+ u32 *action,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_int_port *dest_int_port;
+ int err;
- if (ft_flow && out_dev == priv->netdev) {
- /* Ignore forward to self rules generated
- * by adding both mlx5 devs to the flow table
- * block on a normal nft offload setup.
- */
- return -EOPNOTSUPP;
- }
+ parse_attr = attr->parse_attr;
+ int_port_priv = mlx5e_get_int_port_priv(priv);
- if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't support more output ports, can't offload forwarding");
- pr_err("can't support more than %d output ports, can't offload forwarding\n",
- attr->out_count);
- return -EOPNOTSUPP;
- }
+ dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
+ if (IS_ERR(dest_int_port))
+ return PTR_ERR(dest_int_port);
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- if (encap) {
- parse_attr->mirred_ifindex[attr->out_count] =
- out_dev->ifindex;
- parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
- if (!parse_attr->tun_info[attr->out_count])
- return -ENOMEM;
- encap = false;
- attr->dests[attr->out_count].flags |=
- MLX5_ESW_DEST_ENCAP;
- attr->out_count++;
- /* attr->dests[].rep is resolved when we
- * handle encap
- */
- } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- struct net_device *uplink_upper;
-
- if (is_duplicated_output_device(priv->netdev,
- out_dev,
- ifindexes,
- if_count,
- extack))
- return -EOPNOTSUPP;
-
- ifindexes[if_count] = out_dev->ifindex;
- if_count++;
-
- rcu_read_lock();
- uplink_upper =
- netdev_master_upper_dev_get_rcu(uplink_dev);
- if (uplink_upper &&
- netif_is_lag_master(uplink_upper) &&
- uplink_upper == out_dev)
- out_dev = uplink_dev;
- rcu_read_unlock();
-
- if (is_vlan_dev(out_dev)) {
- err = add_vlan_push_action(priv, attr,
- &out_dev,
- &action);
- if (err)
- return err;
- }
-
- if (is_vlan_dev(parse_attr->filter_dev)) {
- err = add_vlan_pop_action(priv, attr,
- &action);
- if (err)
- return err;
- }
-
- if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "devices are not on same switch HW, can't offload forwarding");
- pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
- return -EOPNOTSUPP;
- }
-
- out_priv = netdev_priv(out_dev);
- rpriv = out_priv->ppriv;
- attr->dests[attr->out_count].rep = rpriv->rep;
- attr->dests[attr->out_count].mdev = out_priv->mdev;
- attr->out_count++;
- } else if (parse_attr->filter_dev != priv->netdev) {
- /* All mlx5 devices are called to configure
- * high level device filters. Therefore, the
- * *attempt* to install a filter on invalid
- * eswitch should not trigger an explicit error
- */
- return -EINVAL;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "devices are not on same switch HW, can't offload forwarding");
- pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
- return -EINVAL;
- }
- }
- break;
- case FLOW_ACTION_TUNNEL_ENCAP:
- info = act->tunnel;
- if (info)
- encap = true;
- else
- return -EOPNOTSUPP;
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
+ mlx5e_tc_int_port_get_metadata(dest_int_port));
+ if (err) {
+ mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
+ return err;
+ }
- break;
- case FLOW_ACTION_VLAN_PUSH:
- case FLOW_ACTION_VLAN_POP:
- if (act->id == FLOW_ACTION_VLAN_PUSH &&
- (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
- /* Replace vlan pop+push with vlan modify */
- action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
- err = add_vlan_rewrite_action(priv,
- MLX5_FLOW_NAMESPACE_FDB,
- act, parse_attr, hdrs,
- &action, extack);
- } else {
- err = parse_tc_vlan_action(priv, act, attr, &action);
- }
- if (err)
- return err;
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->split_count = attr->out_count;
- break;
- case FLOW_ACTION_VLAN_MANGLE:
- err = add_vlan_rewrite_action(priv,
- MLX5_FLOW_NAMESPACE_FDB,
- act, parse_attr, hdrs,
- &action, extack);
- if (err)
- return err;
+ esw_attr->dest_int_port = dest_int_port;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
- attr->split_count = attr->out_count;
- break;
- case FLOW_ACTION_TUNNEL_DECAP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- break;
- case FLOW_ACTION_GOTO: {
- u32 dest_chain = act->chain_index;
- u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
+ /* Forward to root fdb for matching against the new source vport */
+ attr->dest_chain = 0;
- if (ft_flow) {
- NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
- return -EOPNOTSUPP;
- }
- if (dest_chain <= attr->chain) {
- NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
- return -EOPNOTSUPP;
- }
- if (dest_chain > max_chain) {
- NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
- return -EOPNOTSUPP;
- }
- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- attr->dest_chain = dest_chain;
- break;
- }
- default:
- NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
- return -EOPNOTSUPP;
- }
- }
+ return 0;
+}
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
- action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
- /* For prio tag mode, replace vlan pop with rewrite vlan prio
- * tag rewrite.
- */
- action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
- err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
- &action, extack);
- if (err)
- return err;
- }
+static int
+parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_act_parse_state *parse_state;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *filter_dev;
+ int err;
- if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
- hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
- err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr, hdrs, &action, extack);
- if (err)
- return err;
- /* in case all pedit actions are skipped, remove the MOD_HDR
- * flag. we might have set split_count either by pedit or
- * pop/push. if there is no pop/push either, reset it too.
- */
- if (parse_attr->num_mod_hdr_actions == 0) {
- action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- kfree(parse_attr->mod_hdr_actions);
- if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
- (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
- attr->split_count = 0;
- }
- }
+ err = flow_action_supported(flow_action, extack);
+ if (err)
+ return err;
- attr->action = action;
- if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
- return -EOPNOTSUPP;
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ filter_dev = parse_attr->filter_dev;
+ parse_state = &parse_attr->parse_state;
+ mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
+ parse_state->ct_priv = get_ct_priv(priv);
- if (attr->dest_chain) {
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
- return -EOPNOTSUPP;
- }
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
+ err = parse_tc_actions(parse_state, flow_action);
+ if (err)
+ return err;
- if (!(attr->action &
- (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
- NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action");
+ /* Forward to/from internal port can only have 1 dest */
+ if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
+ esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rules with internal port can have only one destination");
return -EOPNOTSUPP;
}
- if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ /* Forward from tunnel/internal port to internal port is not supported */
+ if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
+ esw_attr->dest_int_port) {
NL_SET_ERR_MSG_MOD(extack,
- "current firmware doesn't support split rule for port mirroring");
- netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
+ "Forwarding from tunnel/internal port to internal port is not supported");
return -EOPNOTSUPP;
}
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
+ if (err)
+ return err;
+
+ if (!actions_match_supported(priv, flow_action, parse_state->actions,
+ parse_attr, flow, extack))
+ return -EOPNOTSUPP;
+
return 0;
}
@@ -3622,37 +4148,54 @@ static const struct rhashtable_params tc_ht_params = {
static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
unsigned long flags)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5e_rep_priv *rpriv;
if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- return &uplink_rpriv->uplink_priv.tc_ht;
+ rpriv = priv->ppriv;
+ return &rpriv->tc_ht;
} else /* NIC offload */
- return &priv->fs.tc.ht;
+ return &tc->ht;
}
static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
{
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
flow_flag_test(flow, INGRESS);
bool act_is_encap = !!(attr->action &
MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
- bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+ bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
MLX5_DEVCOM_ESW_OFFLOADS);
if (!esw_paired)
return false;
- if ((mlx5_lag_is_sriov(attr->in_mdev) ||
- mlx5_lag_is_multipath(attr->in_mdev)) &&
+ if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
+ mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
(is_rep_ingress || act_is_encap))
return true;
return false;
}
+struct mlx5_flow_attr *
+mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
+{
+ u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
+ sizeof(struct mlx5_esw_flow_attr) :
+ sizeof(struct mlx5_nic_flow_attr);
+ struct mlx5_flow_attr *attr;
+
+ attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
+ if (!attr)
+ return attr;
+
+ INIT_LIST_HEAD(&attr->list);
+ return attr;
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct flow_cls_offload *f, unsigned long flow_flags,
@@ -3660,25 +4203,34 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct mlx5e_tc_flow **__flow)
{
struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr;
struct mlx5e_tc_flow *flow;
- int out_index, err;
+ int err = -ENOMEM;
+ int out_index;
- flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
- if (!parse_attr || !flow) {
- err = -ENOMEM;
+ if (!parse_attr || !flow)
goto err_free;
- }
- flow->cookie = f->cookie;
flow->flags = flow_flags;
+ flow->cookie = f->cookie;
flow->priv = priv;
+
+ attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
+ if (!attr)
+ goto err_free;
+
+ flow->attr = attr;
+
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
INIT_LIST_HEAD(&flow->encaps[out_index].list);
- INIT_LIST_HEAD(&flow->mod_hdr);
INIT_LIST_HEAD(&flow->hairpin);
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
+ INIT_LIST_HEAD(&flow->attrs);
refcount_set(&flow->refcnt, 1);
init_completion(&flow->init_done);
+ init_completion(&flow->del_hw_done);
*__flow = flow;
*__parse_attr = parse_attr;
@@ -3692,7 +4244,17 @@ err_free:
}
static void
-mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
+mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct flow_cls_offload *f)
+{
+ attr->parse_attr = parse_attr;
+ attr->chain = f->common.chain_index;
+ attr->prio = f->common.prio;
+}
+
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct flow_cls_offload *f,
@@ -3700,10 +4262,9 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
struct mlx5_core_dev *in_mdev)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- esw_attr->parse_attr = parse_attr;
- esw_attr->chain = f->common.chain_index;
- esw_attr->prio = f->common.prio;
+ mlx5e_flow_attr_init(attr, parse_attr, f);
esw_attr->in_rep = in_rep;
esw_attr->in_mdev = in_mdev;
@@ -3724,6 +4285,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_core_dev *in_mdev)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -3737,7 +4299,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
goto out;
parse_attr->filter_dev = filter_dev;
- mlx5e_flow_esw_attr_init(flow->esw_attr,
+ mlx5e_flow_esw_attr_init(flow->attr,
priv, parse_attr,
f, in_rep, in_mdev);
@@ -3746,21 +4308,39 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
+ /* actions validation depends on parsing the ct matches first */
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+ &flow->attr->ct_attr, extack);
+ if (err)
+ goto err_free;
+
+ /* always set IP version for indirect table handling */
+ flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
+
err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
+ if (flow->attr->lag.count) {
+ err = mlx5_lag_add_mpesw_rule(esw->dev);
+ if (err)
+ goto err_free;
+ }
+
err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
complete_all(&flow->init_done);
if (err) {
if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
- goto err_free;
+ goto err_lag;
add_unready_flow(flow);
}
return flow;
+err_lag:
+ if (flow->attr->lag.count)
+ mlx5_lag_del_mpesw_rule(esw->dev);
err_free:
mlx5e_flow_put(priv, flow);
out:
@@ -3773,6 +4353,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
{
struct mlx5e_priv *priv = flow->priv, *peer_priv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_rep_priv *peer_urpriv;
@@ -3792,15 +4373,15 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
* original flow and packets redirected from uplink use the
* peer mdev.
*/
- if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
in_mdev = peer_priv->mdev;
else
in_mdev = priv->mdev;
- parse_attr = flow->esw_attr->parse_attr;
+ parse_attr = flow->attr->parse_attr;
peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
parse_attr->filter_dev,
- flow->esw_attr->in_rep, in_mdev);
+ attr->in_rep, in_mdev);
if (IS_ERR(peer_flow)) {
err = PTR_ERR(peer_flow);
goto out;
@@ -3864,9 +4445,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow;
int attr_size, err;
- /* multi-chain not supported for NIC rules */
- if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ return -EOPNOTSUPP;
+ } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
return -EOPNOTSUPP;
+ }
flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
attr_size = sizeof(struct mlx5_nic_flow_attr);
@@ -3876,28 +4460,35 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
goto out;
parse_attr->filter_dev = filter_dev;
+ mlx5e_flow_attr_init(flow->attr, parse_attr, f);
+
err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
f, filter_dev);
if (err)
goto err_free;
- err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+ &flow->attr->ct_attr, extack);
+ if (err)
+ goto err_free;
+
+ err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
- err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+ err = mlx5e_tc_add_nic_flow(priv, flow, extack);
if (err)
goto err_free;
flow_flag_set(flow, OFFLOADED);
- kvfree(parse_attr);
*__flow = flow;
return 0;
err_free:
+ flow_flag_set(flow, FAILED);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
- kvfree(parse_attr);
out:
return err;
}
@@ -3928,41 +4519,75 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
return err;
}
+static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
+ struct mlx5e_rep_priv *rpriv)
+{
+ /* Offloaded flow rule is allowed to duplicate on non-uplink representor
+ * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
+ * function is called from NIC mode.
+ */
+ return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags)
{
struct netlink_ext_ack *extack = f->common.extack;
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_tc_flow *flow;
int err = 0;
+ if (!mlx5_esw_hold(priv->mdev))
+ return -EBUSY;
+
+ mlx5_esw_get(priv->mdev);
+
rcu_read_lock();
flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
- rcu_read_unlock();
if (flow) {
+ /* Same flow rule offloaded to non-uplink representor sharing tc block,
+ * just return 0.
+ */
+ if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
+ goto rcu_unlock;
+
NL_SET_ERR_MSG_MOD(extack,
"flow cookie already exists, ignoring");
netdev_warn_once(priv->netdev,
"flow cookie %lx already exists, ignoring\n",
f->cookie);
err = -EEXIST;
- goto out;
+ goto rcu_unlock;
}
+rcu_unlock:
+ rcu_read_unlock();
+ if (flow)
+ goto out;
trace_mlx5e_configure_flower(f);
err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
if (err)
goto out;
+ /* Flow rule offloaded to non-uplink representor sharing tc block,
+ * set the flow's owner dev.
+ */
+ if (is_flow_rule_duplicate_allowed(dev, rpriv))
+ flow->orig_dev = dev;
+
err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
if (err)
goto err_free;
+ mlx5_esw_release(priv->mdev);
return 0;
err_free:
mlx5e_flow_put(priv, flow);
out:
+ mlx5_esw_put(priv->mdev);
+ mlx5_esw_release(priv->mdev);
return err;
}
@@ -4002,6 +4627,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
trace_mlx5e_delete_flower(f);
mlx5e_flow_put(priv, flow);
+ mlx5_esw_put(priv->mdev);
return 0;
errout:
@@ -4034,7 +4660,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
goto errout;
}
- if (mlx5e_is_offloaded_flow(flow)) {
+ if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
counter = mlx5e_tc_get_counter(flow);
if (!counter)
goto errout;
@@ -4068,20 +4694,21 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
no_peer_counter:
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
out:
- flow_stats_update(&f->stats, bytes, packets, lastuse);
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
trace_mlx5e_stats_flower(f);
errout:
mlx5e_flow_put(priv, flow);
return err;
}
-static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
+static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
struct netlink_ext_ack *extack)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch *esw;
+ u32 rate_mbps = 0;
u16 vport_num;
- u32 rate_mbps;
int err;
vport_num = rpriv->rep->vport;
@@ -4098,14 +4725,46 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
* Moreover, if rate is non zero we choose to configure to a minimum of
* 1 mbit/sec.
*/
- rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
- err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+ if (rate) {
+ rate = (rate * BITS_PER_BYTE) + 500000;
+ do_div(rate, 1000000);
+ rate_mbps = max_t(u32, rate, 1);
+ }
+
+ err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
if (err)
NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
return err;
}
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when exceed action is not drop");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+ !flow_action_is_last_entry(action, act)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is ok, but action is not last");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.peakrate_bytes_ps ||
+ act->police.avrate || act->police.overhead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when peakrate/avrate/overhead is configured");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct netlink_ext_ack *extack)
@@ -4125,9 +4784,24 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
+ if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
+ return -EOPNOTSUPP;
+ }
+
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_POLICE:
+ if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not continue");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_policer_validate(flow_action, act, extack);
+ if (err)
+ return err;
+
err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
if (err)
return err;
@@ -4176,33 +4850,35 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
rpriv->prev_vf_vport_stats = cur_stats;
- flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
+ flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
+ FLOW_ACTION_HW_STATS_DELAYED);
}
static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
struct mlx5e_priv *peer_priv)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
struct mlx5e_hairpin_entry *hpe, *tmp;
LIST_HEAD(init_wait_list);
u16 peer_vhca_id;
int bkt;
- if (!same_hw_devs(priv, peer_priv))
+ if (!mlx5e_same_hw_devs(priv, peer_priv))
return;
peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
+ mutex_lock(&tc->hairpin_tbl_lock);
+ hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
if (refcount_inc_not_zero(&hpe->refcnt))
list_add(&hpe->dead_peer_wait_list, &init_wait_list);
- mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
wait_for_completion(&hpe->res_ready);
if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
- hpe->hp->pair->peer_gone = true;
+ mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
mlx5e_hairpin_put(priv, hpe);
}
@@ -4212,7 +4888,6 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- struct mlx5e_flow_steering *fs;
struct mlx5e_priv *peer_priv;
struct mlx5e_tc_table *tc;
struct mlx5e_priv *priv;
@@ -4223,8 +4898,7 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
- fs = container_of(tc, struct mlx5e_flow_steering, tc);
- priv = container_of(fs, struct mlx5e_priv, fs);
+ priv = tc->priv;
peer_priv = netdev_priv(ndev);
if (priv == peer_priv ||
!(priv->netdev->features & NETIF_F_HW_TC))
@@ -4235,21 +4909,108 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
+{
+ int tc_grp_size, tc_tbl_size;
+ u32 max_flow_counter;
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
+ return tc_tbl_size;
+}
+
+static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_flow_table **ft = &tc->miss_t;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ ft_attr.max_fte = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_TC_MISS_LEVEL;
+ ft_attr.prio = 0;
+ ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
+
+ *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(*ft)) {
+ err = PTR_ERR(*ft);
+ netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
+ }
+
+ return err;
+}
+
+static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+
+ mlx5_destroy_flow_table(tc->miss_t);
+}
+
int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
{
- struct mlx5e_tc_table *tc = &priv->fs.tc;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mapping_ctx *chains_mapping;
+ struct mlx5_chains_attr attr = {};
+ u64 mapping_id;
int err;
+ mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
mutex_init(&tc->t_lock);
- mutex_init(&tc->mod_hdr.lock);
- hash_init(tc->mod_hdr.hlist);
mutex_init(&tc->hairpin_tbl_lock);
hash_init(tc->hairpin_tbl);
+ tc->priv = priv;
err = rhashtable_init(&tc->ht, &tc_ht_params);
if (err)
return err;
+ lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
+
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
+ if (IS_ERR(chains_mapping)) {
+ err = PTR_ERR(chains_mapping);
+ goto err_mapping;
+ }
+ tc->mapping = chains_mapping;
+
+ err = mlx5e_tc_nic_create_miss_table(priv);
+ if (err)
+ goto err_chains;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+ attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
+ attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
+ attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
+ attr.default_ft = tc->miss_t;
+ attr.mapping = chains_mapping;
+
+ tc->chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(tc->chains)) {
+ err = PTR_ERR(tc->chains);
+ goto err_miss;
+ }
+
+ tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
+ tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
+ MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
+
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
err = register_netdevice_notifier_dev_net(priv->netdev,
&tc->netdevice_nb,
@@ -4257,8 +5018,21 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
if (err) {
tc->netdevice_nb.notifier_call = NULL;
mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+ goto err_reg;
}
+ return 0;
+
+err_reg:
+ mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
+ mlx5_chains_destroy(tc->chains);
+err_miss:
+ mlx5e_tc_nic_destroy_miss_table(priv);
+err_chains:
+ mapping_destroy(chains_mapping);
+err_mapping:
+ rhashtable_destroy(&tc->ht);
return err;
}
@@ -4273,35 +5047,134 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
{
- struct mlx5e_tc_table *tc = &priv->fs.tc;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier_dev_net(priv->netdev,
&tc->netdevice_nb,
&tc->netdevice_nn);
- mutex_destroy(&tc->mod_hdr.lock);
+ mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
mutex_destroy(&tc->hairpin_tbl_lock);
- rhashtable_destroy(&tc->ht);
+ rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
if (!IS_ERR_OR_NULL(tc->t)) {
- mlx5_destroy_flow_table(tc->t);
+ mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
tc->t = NULL;
}
mutex_destroy(&tc->t_lock);
+
+ mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
+ mapping_destroy(tc->mapping);
+ mlx5_chains_destroy(tc->chains);
+ mlx5e_tc_nic_destroy_miss_table(priv);
}
-int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
{
- return rhashtable_init(tc_ht, &tc_ht_params);
+ int err;
+
+ err = rhashtable_init(tc_ht, &tc_ht_params);
+ if (err)
+ return err;
+
+ lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+
+ return 0;
}
-void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
{
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
}
+int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
+{
+ const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
+ struct mlx5e_rep_priv *rpriv;
+ struct mapping_ctx *mapping;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ u64 mapping_id;
+ int err = 0;
+
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+ priv = netdev_priv(rpriv->netdev);
+ esw = priv->mdev->priv.eswitch;
+
+ uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
+ MLX5_FLOW_NAMESPACE_FDB);
+ uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
+ esw_chains(esw),
+ &esw->offloads.mod_hdr,
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
+
+ uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
+
+ uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
+
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+ sizeof(struct tunnel_match_key),
+ TUNNEL_INFO_BITS_MASK, true);
+
+ if (IS_ERR(mapping)) {
+ err = PTR_ERR(mapping);
+ goto err_tun_mapping;
+ }
+ uplink_priv->tunnel_mapping = mapping;
+
+ /* Two last values are reserved for stack devices slow path table mark
+ * and bridge ingress push mark.
+ */
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
+ if (IS_ERR(mapping)) {
+ err = PTR_ERR(mapping);
+ goto err_enc_opts_mapping;
+ }
+ uplink_priv->tunnel_enc_opts_mapping = mapping;
+
+ uplink_priv->encap = mlx5e_tc_tun_init(priv);
+ if (IS_ERR(uplink_priv->encap)) {
+ err = PTR_ERR(uplink_priv->encap);
+ goto err_register_fib_notifier;
+ }
+
+ return 0;
+
+err_register_fib_notifier:
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+err_enc_opts_mapping:
+ mapping_destroy(uplink_priv->tunnel_mapping);
+err_tun_mapping:
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+ mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ netdev_warn(priv->netdev,
+ "Failed to initialize tc (eswitch), err: %d", err);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
+ return err;
+}
+
+void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
+{
+ mlx5e_tc_tun_cleanup(uplink_priv->encap);
+
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+ mapping_destroy(uplink_priv->tunnel_mapping);
+
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+ mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
+}
+
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
{
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
@@ -4331,3 +5204,90 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
}
mutex_unlock(&rpriv->unready_flows_lock);
}
+
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower,
+ unsigned long flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS);
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!priv->netdev || !netif_device_present(priv->netdev))
+ return -EOPNOTSUPP;
+
+ if (mlx5e_is_uplink_rep(priv))
+ flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
+ else
+ flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain = 0, chain_tag, reg_b, zone_restore_id;
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
+ struct mlx5_mapped_obj mapped_obj;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5e_tc_table *tc;
+ int err;
+
+ reg_b = be32_to_cpu(cqe->ft_metadata);
+ tc = mlx5e_fs_get_tc(priv->fs);
+ chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+
+ err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find chain for chain tag: %d, err: %d\n",
+ chain_tag, err);
+ return false;
+ }
+
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ chain = mapped_obj.chain;
+ tc_skb_ext = tc_skb_ext_alloc(skb);
+ if (WARN_ON(!tc_skb_ext))
+ return false;
+
+ tc_skb_ext->chain = chain;
+
+ zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
+ ESW_ZONE_ID_MASK;
+
+ if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
+ zone_restore_id))
+ return false;
+ } else {
+ netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+ return false;
+ }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 262cdb7b69b1..48241317a535 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -34,11 +34,140 @@
#define __MLX5_EN_TC_H__
#include <net/pkt_cls.h>
+#include "en.h"
+#include "eswitch.h"
+#include "en/tc_ct.h"
+#include "en/tc_tun.h"
+#include "en/tc/int_port.h"
+#include "en/tc/meter.h"
+#include "en_rep.h"
#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
#ifdef CONFIG_MLX5_ESWITCH
+#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+ sizeof(struct mlx5_nic_flow_attr))
+#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+ sizeof(struct mlx5_esw_flow_attr))
+#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\
+ ESW_FLOW_ATTR_SZ :\
+ NIC_FLOW_ATTR_SZ)
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
+
+struct mlx5e_tc_update_priv {
+ struct net_device *fwd_dev;
+};
+
+struct mlx5_nic_flow_attr {
+ u32 flow_tag;
+ u32 hairpin_tirn;
+ struct mlx5_flow_table *hairpin_ft;
+};
+
+struct mlx5_flow_attr {
+ u32 action;
+ struct mlx5_fc *counter;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_ct_attr ct_attr;
+ struct mlx5e_sample_attr sample_attr;
+ struct mlx5e_meter_attr meter_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ u32 chain;
+ u16 prio;
+ u32 dest_chain;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_table *dest_ft;
+ u8 inner_match_level;
+ u8 outer_match_level;
+ u8 ip_version;
+ u8 tun_ip_version;
+ int tunnel_id; /* mapped tunnel id */
+ u32 flags;
+ u32 exe_aso_type;
+ struct list_head list;
+ struct mlx5e_post_act_handle *post_act_handle;
+ struct {
+ /* Indicate whether the parsed flow should be counted for lag mode decision
+ * making
+ */
+ bool count;
+ } lag;
+ /* keep this union last */
+ union {
+ struct mlx5_esw_flow_attr esw_attr[0];
+ struct mlx5_nic_flow_attr nic_attr[0];
+ };
+};
+
+enum {
+ MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0),
+ MLX5_ATTR_FLAG_SLOW_PATH = BIT(1),
+ MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2),
+ MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3),
+ MLX5_ATTR_FLAG_SAMPLE = BIT(4),
+ MLX5_ATTR_FLAG_ACCEPT = BIT(5),
+ MLX5_ATTR_FLAG_CT = BIT(6),
+};
+
+/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
+static inline bool
+mlx5e_tc_attr_flags_skip(u32 attr_flags)
+{
+ return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT);
+}
+
+struct mlx5_rx_tun_attr {
+ u16 decap_vport;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } src_ip; /* Valid if decap_vport is not zero */
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip; /* Valid if decap_vport is not zero */
+ u32 vni;
+};
+
+#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
+#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
+
+#define MLX5E_TC_MAX_INT_PORT_NUM (8)
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+struct tunnel_match_key {
+ struct flow_dissector_key_control enc_control;
+ struct flow_dissector_key_keyid enc_key_id;
+ struct flow_dissector_key_ports enc_tp;
+ struct flow_dissector_key_ip enc_ip;
+ union {
+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
+ };
+
+ int filter_ifindex;
+};
+
+struct tunnel_match_enc_opts {
+ struct flow_dissector_key_enc_opts key;
+ struct flow_dissector_key_enc_opts mask;
+};
+
+/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
+ * Upper TUNNEL_INFO_BITS for general tunnel info.
+ * Lower ENC_OPTS_BITS bits for enc_opts.
+ */
+#define TUNNEL_INFO_BITS 12
+#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
+#define ENC_OPTS_BITS 11
+#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
+#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
+#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
+
enum {
MLX5E_TC_FLAG_INGRESS_BIT,
MLX5E_TC_FLAG_EGRESS_BIT,
@@ -50,11 +179,11 @@ enum {
#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
-int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
-void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv);
+void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv);
-int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
-void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
+int mlx5e_tc_ht_init(struct rhashtable *tc_ht);
+void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht);
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags);
@@ -82,18 +211,137 @@ bool mlx5e_encap_take(struct mlx5e_encap_entry *e);
void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e);
void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list);
-void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
struct mlx5e_neigh_hash_entry;
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e);
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
-
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+enum mlx5e_tc_attr_to_reg {
+ CHAIN_TO_REG,
+ VPORT_TO_REG,
+ TUNNEL_TO_REG,
+ CTSTATE_TO_REG,
+ ZONE_TO_REG,
+ ZONE_RESTORE_TO_REG,
+ MARK_TO_REG,
+ LABELS_TO_REG,
+ FTEID_TO_REG,
+ NIC_CHAIN_TO_REG,
+ NIC_ZONE_RESTORE_TO_REG,
+ PACKET_COLOR_TO_REG,
+};
+
+struct mlx5e_tc_attr_to_reg_mapping {
+ int mfield; /* rewrite field */
+ int moffset; /* bit offset of mfield */
+ int mlen; /* bits to rewrite/match */
+
+ int soffset; /* byte offset of spec for match */
+};
+
+extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
+
+#define MLX5_REG_MAPPING_MOFFSET(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].moffset)
+#define MLX5_REG_MAPPING_MBITS(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].mlen)
+#define MLX5_REG_MAPPING_MASK(reg_id) (GENMASK(mlx5e_tc_attr_to_reg_mappings[reg_id].mlen - 1, 0))
+
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
struct net_device *out_dev);
+int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data);
+
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data);
+
+void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data,
+ u32 mask);
+
+void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 *data,
+ u32 *mask);
+
+int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data);
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+ struct flow_match_basic *match, bool outer,
+ void *headers_c, void *headers_v);
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv);
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev);
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev,
+ u16 *vport);
+
+int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type,
+ u32 *action,
+ int out_index);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; }
+static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {}
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type);
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
@@ -102,6 +350,42 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv,
{
return 0;
}
+
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
+#endif
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+struct mlx5e_tc_table *mlx5e_tc_table_alloc(void);
+void mlx5e_tc_table_free(struct mlx5e_tc_table *tc);
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain, reg_b;
+
+ reg_b = be32_to_cpu(cqe->ft_metadata);
+
+ if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS))
+ return false;
+
+ chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+ if (chain)
+ return true;
+#endif
+
+ return false;
+}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; }
+static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {}
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{ return false; }
+static inline bool
+mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
+{ return true; }
#endif
#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ee60383adc5b..f7897ddb29c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -38,8 +38,10 @@
#include "en/txrx.h"
#include "ipoib/ipoib.h"
#include "en_accel/en_accel.h"
-#include "en_accel/ktls.h"
-#include "lib/clock.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/macsec.h"
+#include "en/ptp.h"
+#include <net/ipv6.h>
static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
{
@@ -53,49 +55,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
}
}
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
-{
- int dscp_cp = 0;
-
- if (skb->protocol == htons(ETH_P_IP))
- dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- else if (skb->protocol == htons(ETH_P_IPV6))
- dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
-
- return priv->dcbx_dp.dscp2prio[dscp_cp];
-}
-#endif
-
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- int txq_ix = netdev_pick_tx(dev, skb, NULL);
- struct mlx5e_priv *priv = netdev_priv(dev);
- u16 num_channels;
- int up = 0;
-
- if (!netdev_get_num_tc(dev))
- return txq_ix;
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
- up = mlx5e_get_dscp_up(priv, skb);
- else
-#endif
- if (skb_vlan_tag_present(skb))
- up = skb_vlan_tag_get_prio(skb);
-
- /* txq_ix can be larger than num_channels since
- * dev->num_real_tx_queues = num_channels * num_tc
- */
- num_channels = priv->channels.params.num_channels;
- if (txq_ix >= num_channels)
- txq_ix = priv->txq2sq[txq_ix]->ch_ix;
-
- return priv->channel_tc2realtxq[txq_ix][up];
-}
-
static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
{
#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
@@ -134,21 +93,36 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
return min_t(u16, hlen, skb_headlen(skb));
}
+#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \
+ "This copy has been bounds-checked earlier in " \
+ "mlx5i_sq_calc_wqe_attr() and intentionally " \
+ "crosses a flex array boundary. Since it is " \
+ "performance sensitive, splitting the copy is " \
+ "undesirable."
+
static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
{
struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
int cpy1_sz = 2 * ETH_ALEN;
int cpy2_sz = ihs - cpy1_sz;
- memcpy(vhdr, skb->data, cpy1_sz);
+ memcpy(&vhdr->addrs, skb->data, cpy1_sz);
vhdr->h_vlan_proto = skb->vlan_proto;
vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
- memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
+ unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto,
+ skb->data + cpy1_sz,
+ cpy2_sz,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
}
static inline void
-mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5_wqe_eth_seg *eseg)
{
+ if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg)))
+ return;
+
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
if (skb->encapsulation) {
@@ -159,27 +133,41 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct
eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
sq->stats->csum_partial++;
}
+#ifdef CONFIG_MLX5_EN_TLS
+ } else if (unlikely(accel && accel->tls.tls_tisn)) {
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+#endif
} else
sq->stats->csum_none++;
}
+/* Returns the number of header bytes that we plan
+ * to inline later in the transmit descriptor
+ */
static inline u16
-mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
{
struct mlx5e_sq_stats *stats = sq->stats;
u16 ihs;
+ *hopbyhop = 0;
if (skb->encapsulation) {
- ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
+ ihs = skb_inner_tcp_all_headers(skb);
stats->tso_inner_packets++;
stats->tso_inner_bytes += skb->len - ihs;
} else {
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
- else
- ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ } else {
+ ihs = skb_tcp_all_headers(skb);
+ if (ipv6_has_hopopt_jumbo(skb)) {
+ *hopbyhop = sizeof(struct hop_jumbo_hdr);
+ ihs -= sizeof(struct hop_jumbo_hdr);
+ }
+ }
stats->tso_packets++;
- stats->tso_bytes += skb->len - ihs;
+ stats->tso_bytes += skb->len - ihs - *hopbyhop;
}
return ihs;
@@ -234,130 +222,253 @@ dma_unmap_wqe_err:
return -ENOMEM;
}
+struct mlx5e_tx_attr {
+ u32 num_bytes;
+ u16 headlen;
+ u16 ihs;
+ __be16 mss;
+ u16 insz;
+ u8 opcode;
+ u8 hopbyhop;
+};
+
+struct mlx5e_tx_wqe_attr {
+ u16 ds_cnt;
+ u16 ds_cnt_inl;
+ u16 ds_cnt_ids;
+ u8 num_wqebbs;
+};
+
+static u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel)
+{
+ u8 mode;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ if (accel && accel->tls.tls_tisn)
+ return MLX5_INLINE_MODE_TCP_UDP;
+#endif
+
+ mode = sq->min_inline_mode;
+
+ if (skb_vlan_tag_present(skb) &&
+ test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+ mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+ return mode;
+}
+
+static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5e_tx_attr *attr)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+
+ if (skb_is_gso(skb)) {
+ int hopbyhop;
+ u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_LSO,
+ .mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
+ .ihs = ihs,
+ .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
+ .headlen = skb_headlen(skb) - ihs - hopbyhop,
+ .hopbyhop = hopbyhop,
+ };
+
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
+ u16 ihs = mlx5e_calc_min_inline(mode, skb);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_SEND,
+ .mss = cpu_to_be16(0),
+ .ihs = ihs,
+ .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
+ .headlen = skb_headlen(skb) - ihs,
+ };
+
+ stats->packets++;
+ }
+
+ attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
+ stats->bytes += attr->num_bytes;
+}
+
+static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
+ u16 ds_cnt_inl = 0;
+ u16 ds_cnt_ids = 0;
+
+ /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */
+
+ if (attr->insz)
+ ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
+ MLX5_SEND_WQE_DS);
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ if (skb_vlan_tag_present(skb))
+ inl += VLAN_HLEN;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS))
+ netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl,
+ (u16)MLX5E_MAX_TX_INLINE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .ds_cnt_ids = ds_cnt_ids,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
+static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+
+static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+}
+
+static void mlx5e_tx_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ /* Must not be called when a MPWQE session is active but empty. */
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+
+ wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
+ const struct mlx5e_tx_attr *attr,
+ const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
bool xmit_more)
{
struct mlx5_wq_cyc *wq = &sq->wq;
bool send_doorbell;
- wi->num_bytes = num_bytes;
- wi->num_dma = num_dma;
- wi->num_wqebbs = num_wqebbs;
- wi->skb = skb;
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = skb,
+ .num_bytes = attr->num_bytes,
+ .num_dma = num_dma,
+ .num_wqebbs = wqe_attr->num_wqebbs,
+ .num_fifo_pkts = 0,
+ };
- cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ mlx5e_tx_skb_update_hwts_flags(skb);
sq->pc += wi->num_wqebbs;
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
- netif_tx_stop_queue(sq->txq);
- sq->stats->stopped++;
+
+ mlx5e_tx_check_stop(sq);
+
+ if (unlikely(sq->ptpsq)) {
+ mlx5e_skb_cb_hwtstamp_init(skb);
+ mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb);
+ if (!netif_tx_queue_stopped(sq->txq) &&
+ !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+ skb_get(skb);
}
- send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
- xmit_more);
+ send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
if (send_doorbell)
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
}
-netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
+static void
+mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
+ struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_eth_seg *eseg;
struct mlx5_wqe_data_seg *dseg;
struct mlx5e_tx_wqe_info *wi;
-
+ u16 ihs = attr->ihs;
+ struct ipv6hdr *h6;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 headlen, ihs, contig_wqebbs_room;
- u16 ds_cnt, ds_cnt_inl = 0;
- u8 num_wqebbs, opcode;
- u32 num_bytes;
int num_dma;
- __be16 mss;
-
- /* Calc ihs and ds cnt, no writes to wqe yet */
- ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- if (skb_is_gso(skb)) {
- opcode = MLX5_OPCODE_LSO;
- mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
- ihs = mlx5e_tx_get_gso_ihs(sq, skb);
- num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
- stats->packets += skb_shinfo(skb)->gso_segs;
- } else {
- u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
- opcode = MLX5_OPCODE_SEND;
- mss = 0;
- ihs = mlx5e_calc_min_inline(mode, skb);
- num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
- stats->packets++;
- }
-
- stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
- headlen = skb->len - ihs - skb->data_len;
- ds_cnt += !!headlen;
- ds_cnt += skb_shinfo(skb)->nr_frags;
-
- if (ihs) {
- ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
-
- ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
- ds_cnt += ds_cnt_inl;
- }
-
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < num_wqebbs)) {
-#ifdef CONFIG_MLX5_EN_IPSEC
- struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
-#endif
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
-#ifdef CONFIG_MLX5_EN_IPSEC
- wqe->eth = cur_eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- wqe->ctrl = cur_ctrl;
-#endif
- }
-
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
eseg = &wqe->eth;
dseg = wqe->data;
-#if IS_ENABLED(CONFIG_GENEVE)
- if (skb->encapsulation)
- mlx5e_tx_tunnel_accel(skb, eseg);
-#endif
- mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
-
- eseg->mss = mss;
+ eseg->mss = attr->mss;
if (ihs) {
- eseg->inline_hdr.sz = cpu_to_be16(ihs);
- if (skb_vlan_tag_present(skb)) {
- ihs -= VLAN_HLEN;
- mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
+ u8 *start = eseg->inline_hdr.start;
+
+ if (unlikely(attr->hopbyhop)) {
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ if (skb_vlan_tag_present(skb)) {
+ mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6));
+ ihs += VLAN_HLEN;
+ h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr));
+ } else {
+ unsafe_memcpy(start, skb->data,
+ ETH_HLEN + sizeof(*h6),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ h6 = (struct ipv6hdr *)(start + ETH_HLEN);
+ }
+ h6->nexthdr = IPPROTO_TCP;
+ /* Copy the TCP header after the IPv6 one */
+ memcpy(h6 + 1,
+ skb->data + ETH_HLEN + sizeof(*h6) +
+ sizeof(struct hop_jumbo_hdr),
+ tcp_hdrlen(skb));
+ /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
+ } else if (skb_vlan_tag_present(skb)) {
+ mlx5e_insert_vlan(start, skb, ihs);
+ ihs += VLAN_HLEN;
stats->added_vlan_packets++;
} else {
- memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ attr->ihs,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
}
- dseg += ds_cnt_inl;
+ eseg->inline_hdr.sz |= cpu_to_be16(ihs);
+ dseg += wqe_attr->ds_cnt_inl;
} else if (skb_vlan_tag_present(skb)) {
eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
@@ -366,54 +477,304 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->added_vlan_packets++;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ dseg += wqe_attr->ds_cnt_ids;
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop,
+ attr->headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
- mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
- num_dma, wi, cseg, xmit_more);
+ mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
- return NETDEV_TX_OK;
+ return;
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
+}
- return NETDEV_TX_OK;
+static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
+{
+ return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
+ !attr->insz && !mlx5e_macsec_skb_is_offload(skb);
+}
+
+static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+
+ /* Assumes the session is already running and has at least one packet. */
+ return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+}
+
+static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = 0,
+ };
+
+ memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+
+ sq->stats->mpwqe_blks++;
+}
+
+static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
+{
+ return sq->mpwqe.wqe;
+}
+
+static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg;
+
+ dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+
+ session->pkt_count++;
+ session->bytes_count += txd->len;
+
+ dseg->addr = cpu_to_be64(txd->dma_addr);
+ dseg->byte_count = cpu_to_be32(txd->len);
+ dseg->lkey = sq->mkey_be;
+ session->ds_count++;
+
+ sq->stats->mpwqe_pkts++;
+}
+
+static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ u8 ds_count = session->ds_count;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_tx_wqe_info *wi;
+ u16 pi;
+
+ cseg = &session->wqe->ctrl;
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = NULL,
+ .num_bytes = session->bytes_count,
+ .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
+ .num_dma = session->pkt_count,
+ .num_fifo_pkts = session->pkt_count,
+ };
+
+ sq->pc += wi->num_wqebbs;
+
+ session->wqe = NULL;
+
+ mlx5e_tx_check_stop(sq);
+
+ return cseg;
+}
+
+static void
+mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_xmit_data txd;
+
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+
+ if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
+ mlx5e_tx_mpwqe_session_complete(sq);
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ }
+
+ sq->stats->xmit_more += xmit_more;
+
+ mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
+ mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
+ mlx5e_tx_mpwqe_add_dseg(sq, &txd);
+ mlx5e_tx_skb_update_hwts_flags(skb);
+
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
+ /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
+ /* Might stop the queue, but we were asked to ring the doorbell anyway. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ }
+
+ return;
+
+err_unmap:
+ mlx5e_dma_unmap_wqe_err(sq, 1);
+ sq->stats->dropped++;
+ dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
+}
+
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
+{
+ /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
+ if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
+ mlx5e_tx_mpwqe_session_complete(sq);
+}
+
+static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc &
+ ptpsq->ts_cqe_ctr_mask);
+}
+
+static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
+ struct mlx5_wqe_eth_seg *eseg, u16 ihs)
+{
+ mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
+ mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
+ if (unlikely(sq->ptpsq))
+ mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg);
}
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_accel_tx_state accel = {};
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
struct mlx5e_tx_wqe *wqe;
struct mlx5e_txqsq *sq;
u16 pi;
+ /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
+ * queue being changed is disabled, and smp_wmb guarantees that the
+ * changes are visible before mlx5e_xmit tries to read from txq2sq. It
+ * guarantees that the value of txq2sq[qid] doesn't change while
+ * mlx5e_xmit is running on queue number qid. smb_wmb is paired with
+ * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
+ */
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
- wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+ if (unlikely(!sq)) {
+ /* Two cases when sq can be NULL:
+ * 1. The HTB node is registered, and mlx5e_select_queue
+ * selected its queue ID, but the SQ itself is not yet created.
+ * 2. HTB SQ creation failed. Similar to the previous case, but
+ * the SQ won't be created.
+ */
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
- /* might send skbs and update wqe and pi */
- skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
- if (unlikely(!skb))
+ /* May send SKBs and WQEs. */
+ if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
return NETDEV_TX_OK;
- return mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
+ mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
+
+ if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
+ if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
+ struct mlx5_wqe_eth_seg eseg = {};
+
+ mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs);
+ mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
+ return NETDEV_TX_OK;
+ }
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+ }
+
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+
+ /* May update the WQE, but may not post other WQEs. */
+ mlx5e_accel_tx_finish(sq, wqe, &accel,
+ (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
+ mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs);
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
+
+ return NETDEV_TX_OK;
+}
+
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
+{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth);
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
+}
+
+static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ int i;
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+}
+
+static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ struct skb_shared_hwtstamps hwts = {};
+ u64 ts = get_cqe_ts(cqe);
+
+ hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
+ if (sq->ptpsq)
+ mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
+ hwts.hwtstamp, sq->ptpsq->cq_stats);
+ else
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ napi_consume_skb(skb, napi_budget);
}
-static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
- struct mlx5_err_cqe *err_cqe)
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ struct mlx5_cqe64 *cqe, int napi_budget)
{
- struct mlx5_cqwq *wq = &sq->cq.wq;
- u32 ci;
+ int i;
- ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+ for (i = 0; i < wi->num_fifo_pkts; i++) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo);
- netdev_err(sq->channel->netdev,
- "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
- sq->cq.mcq.cqn, ci, sq->sqn,
- get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
- err_cqe->syndrome, err_cqe->vendor_err_synd);
- mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
+ mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+ }
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
@@ -461,52 +822,42 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
- struct sk_buff *skb;
- int j;
-
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
- if (unlikely(!skb)) {
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
- sqcc += wi->num_wqebbs;
- continue;
- }
+ sqcc += wi->num_wqebbs;
- if (unlikely(skb_shinfo(skb)->tx_flags &
- SKBTX_HW_TSTAMP)) {
- struct skb_shared_hwtstamps hwts = {};
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
- hwts.hwtstamp =
- mlx5_timecounter_cyc2time(sq->clock,
- get_cqe_ts(cqe));
- skb_tstamp_tx(skb, &hwts);
+ npkts++;
+ nbytes += wi->num_bytes;
+ continue;
}
- for (j = 0; j < wi->num_dma; j++) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+ &dma_fifo_cc)))
+ continue;
- mlx5e_tx_dma_unmap(sq->pdev, dma);
- }
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
- npkts++;
- nbytes += wi->num_bytes;
- sqcc += wi->num_wqebbs;
- napi_consume_skb(skb, napi_budget);
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
&sq->state)) {
- mlx5e_dump_error_cqe(sq,
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
(struct mlx5_err_cqe *)cqe);
mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
- queue_work(cq->channel->priv->wq,
- &sq->recover_work);
+ queue_work(cq->priv->wq, &sq->recover_work);
}
stats->cqe_err++;
}
@@ -527,6 +878,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
if (netif_tx_queue_stopped(sq->txq) &&
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+ mlx5e_ptpsq_fifo_has_room(sq) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
stats->wake++;
@@ -535,14 +887,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++)
+ dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo));
+}
+
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
- struct sk_buff *skb;
- u32 dma_fifo_cc;
- u16 sqcc;
- u16 ci;
- int i;
+ u32 dma_fifo_cc, nbytes = 0;
+ u16 ci, sqcc, npkts = 0;
sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc;
@@ -550,27 +907,34 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
- if (!skb) {
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
- sqcc += wi->num_wqebbs;
+ sqcc += wi->num_wqebbs;
+
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ dev_kfree_skb_any(wi->skb);
+
+ npkts++;
+ nbytes += wi->num_bytes;
continue;
}
- for (i = 0; i < wi->num_dma; i++) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+ continue;
- mlx5e_tx_dma_unmap(sq->pdev, dma);
- }
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
- dev_kfree_skb_any(skb);
- sqcc += wi->num_wqebbs;
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
}
sq->dma_fifo_cc = dma_fifo_cc;
sq->cc = sqcc;
+
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
}
#ifdef CONFIG_MLX5_CORE_IPOIB
@@ -583,11 +947,34 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
}
-netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5_av *av, u32 dqpn, u32 dqkey,
- bool xmit_more)
+static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
+ u16 ds_cnt_inl = 0;
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
+void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
+{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
struct mlx5i_tx_wqe *wqe;
struct mlx5_wqe_datagram_seg *datagram;
@@ -597,53 +984,17 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 headlen, ihs, pi, contig_wqebbs_room;
- u16 ds_cnt, ds_cnt_inl = 0;
- u8 num_wqebbs, opcode;
- u32 num_bytes;
int num_dma;
- __be16 mss;
+ u16 pi;
- /* Calc ihs and ds cnt, no writes to wqe yet */
- ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- if (skb_is_gso(skb)) {
- opcode = MLX5_OPCODE_LSO;
- mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
- ihs = mlx5e_tx_get_gso_ihs(sq, skb);
- num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
- stats->packets += skb_shinfo(skb)->gso_segs;
- } else {
- u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
- opcode = MLX5_OPCODE_SEND;
- mss = 0;
- ihs = mlx5e_calc_min_inline(mode, skb);
- num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
- stats->packets++;
- }
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
- stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
- headlen = skb->len - ihs - skb->data_len;
- ds_cnt += !!headlen;
- ds_cnt += skb_shinfo(skb)->nr_frags;
-
- if (ihs) {
- ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
- ds_cnt += ds_cnt_inl;
- }
-
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < num_wqebbs)) {
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- }
-
- mlx5i_sq_fetch_wqe(sq, &wqe, pi);
-
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
@@ -653,29 +1004,50 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
- mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
-
- eseg->mss = mss;
-
- if (ihs) {
- memcpy(eseg->inline_hdr.start, skb->data, ihs);
- eseg->inline_hdr.sz = cpu_to_be16(ihs);
- dseg += ds_cnt_inl;
+ mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg);
+
+ eseg->mss = attr.mss;
+
+ if (attr.ihs) {
+ if (unlikely(attr.hopbyhop)) {
+ struct ipv6hdr *h6;
+
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ ETH_HLEN + sizeof(*h6),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN);
+ h6->nexthdr = IPPROTO_TCP;
+ /* Copy the TCP header after the IPv6 one */
+ unsafe_memcpy(h6 + 1,
+ skb->data + ETH_HLEN + sizeof(*h6) +
+ sizeof(struct hop_jumbo_hdr),
+ tcp_hdrlen(skb),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
+ } else {
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ attr.ihs,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ }
+ eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
+ dseg += wqe_attr.ds_cnt_inl;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop,
+ attr.headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
- mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
- num_dma, wi, cseg, xmit_more);
+ mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
- return NETDEV_TX_OK;
+ return;
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
-
- return NETDEV_TX_OK;
+ mlx5e_tx_flush(sq);
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 257a7c9f7a14..9a458a5d9853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -31,20 +31,19 @@
*/
#include <linux/irq.h>
+#include <net/xdp_sock_drv.h>
#include "en.h"
+#include "en/txrx.h"
#include "en/xdp.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
+#include "en_accel/ktls_txrx.h"
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
{
int current_cpu = smp_processor_id();
- const struct cpumask *aff;
- struct irq_data *idata;
- idata = irq_desc_get_irq_data(c->irq_desc);
- aff = irq_data_get_affinity_mask(idata);
- return cpumask_test_cpu(current_cpu, aff);
+ return cpumask_test_cpu(current_cpu, c->aff_mask);
}
static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
@@ -77,30 +76,47 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
struct mlx5e_tx_wqe *nopwqe;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_NOP,
+ .num_wqebbs = 1,
+ };
+
nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
}
static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
{
+ bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool);
bool busy_xsk = false, xsk_rx_alloc_err;
- /* Handle the race between the application querying need_wakeup and the
- * driver setting it:
- * 1. Update need_wakeup both before and after the TX. If it goes to
- * "yes", it can only happen with the first update.
- * 2. If the application queried need_wakeup before we set it, the
- * packets will be transmitted anyway, even w/o a wakeup.
- * 3. Give a chance to clear need_wakeup after new packets were queued
- * for TX.
+ /* If SQ is empty, there are no TX completions to trigger NAPI, so set
+ * need_wakeup. Do it before queuing packets for TX to avoid race
+ * condition with userspace.
*/
- mlx5e_xsk_update_tx_wakeup(xsksq);
+ if (need_wakeup && xsksq->pc == xsksq->cc)
+ xsk_set_tx_need_wakeup(xsksq->xsk_pool);
busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
- mlx5e_xsk_update_tx_wakeup(xsksq);
+ /* If we queued some packets for TX, no need for wakeup anymore. */
+ if (need_wakeup && xsksq->pc != xsksq->cc)
+ xsk_clear_tx_need_wakeup(xsksq->xsk_pool);
- xsk_rx_alloc_err = xskrq->post_wqes(xskrq);
- busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err);
+ /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it
+ * before refilling to avoid race condition with userspace.
+ */
+ if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq))
+ xsk_set_rx_need_wakeup(xskrq->xsk_pool);
+ xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ xskrq);
+ /* Ask for wakeup if WQ is not full after refill. */
+ if (!need_wakeup)
+ busy_xsk |= xsk_rx_alloc_err;
+ else if (xsk_rx_alloc_err)
+ xsk_set_rx_need_wakeup(xskrq->xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(xskrq->xsk_pool);
return busy_xsk;
}
@@ -111,20 +127,40 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
struct mlx5e_xdpsq *xsksq = &c->xsksq;
+ struct mlx5e_txqsq __rcu **qos_sqs;
struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
- bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
bool aff_change = false;
bool busy_xsk = false;
bool busy = false;
int work_done = 0;
+ u16 qos_sqs_size;
+ bool xsk_open;
int i;
+ rcu_read_lock();
+
+ qos_sqs = rcu_dereference(c->qos_sqs);
+
+ xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
ch_stats->poll++;
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
+ if (unlikely(qos_sqs)) {
+ smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */
+ qos_sqs_size = READ_ONCE(c->qos_sqs_size);
+
+ for (i = 0; i < qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+ if (sq)
+ busy |= mlx5e_poll_tx_cq(&sq->cq, budget);
+ }
+ }
+
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
@@ -141,10 +177,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
mlx5e_poll_ico_cq(&c->icosq.cq);
-
- busy |= rq->post_wqes(rq);
+ if (mlx5e_poll_ico_cq(&c->async_icosq.cq))
+ /* Don't clear the flag if nothing was polled to prevent
+ * queueing more WQEs and overflowing the async ICOSQ.
+ */
+ clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state);
+
+ /* Keep after async ICOSQ CQ poll */
+ if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget)))
+ busy |= mlx5e_ktls_rx_handle_resync_list(c, budget);
+
+ busy |= INDIRECT_CALL_2(rq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ rq);
if (xsk_open) {
- mlx5e_poll_ico_cq(&c->xskicosq.cq);
busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
}
@@ -152,8 +199,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
busy |= busy_xsk;
if (busy) {
- if (likely(mlx5e_channel_no_affinity_change(c)))
- return budget;
+ if (likely(mlx5e_channel_no_affinity_change(c))) {
+ work_done = budget;
+ goto out;
+ }
ch_stats->aff_change++;
aff_change = true;
if (budget && work_done == budget)
@@ -161,7 +210,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
if (unlikely(!napi_complete_done(napi, work_done)))
- return work_done;
+ goto out;
ch_stats->arm++;
@@ -169,16 +218,26 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_handle_tx_dim(&c->sq[i]);
mlx5e_cq_arm(&c->sq[i].cq);
}
+ if (unlikely(qos_sqs)) {
+ for (i = 0; i < qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+ if (sq) {
+ mlx5e_handle_tx_dim(sq);
+ mlx5e_cq_arm(&sq->cq);
+ }
+ }
+ }
mlx5e_handle_rx_dim(rq);
mlx5e_cq_arm(&rq->cq);
mlx5e_cq_arm(&c->icosq.cq);
+ mlx5e_cq_arm(&c->async_icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
if (xsk_open) {
mlx5e_handle_rx_dim(xskrq);
- mlx5e_cq_arm(&c->xskicosq.cq);
mlx5e_cq_arm(&xsksq->cq);
mlx5e_cq_arm(&xskrq->cq);
}
@@ -188,6 +247,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
ch_stats->force_irq++;
}
+out:
+ rcu_read_unlock();
+
return work_done;
}
@@ -197,14 +259,13 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
napi_schedule(cq->napi);
cq->event_ctr++;
- cq->channel->stats->events++;
+ cq->ch_stats->events++;
}
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event)
{
struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
- struct mlx5e_channel *c = cq->channel;
- struct net_device *netdev = c->netdev;
+ struct net_device *netdev = cq->netdev;
netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n",
__func__, mcq->cqn, event);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index cccea3a8eddd..a0242dc15741 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1,42 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/eq.h>
-#include <linux/mlx5/cmd.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
@@ -46,6 +17,8 @@
#include "eswitch.h"
#include "lib/clock.h"
#include "diag/fw_tracer.h"
+#include "mlx5_irq.h"
+#include "devlink.h"
enum {
MLX5_EQE_OWNER_INIT_VAL = 0x1,
@@ -85,6 +58,11 @@ struct mlx5_eq_table {
struct mutex lock; /* sync async eqs creations */
int num_comp_eqs;
struct mlx5_irq_table *irq_table;
+ struct mlx5_irq **comp_irqs;
+ struct mlx5_irq *ctrl_irq;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -102,12 +80,11 @@ struct mlx5_eq_table {
static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
{
- u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
MLX5_SET(destroy_eq_in, in, eq_number, eqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, destroy_eq, in);
}
/* caller must eventually call mlx5_cq_put on the returned cq */
@@ -191,6 +168,31 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
return count_eqe;
}
+static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery,
+ unsigned long *flags)
+ __acquires(&eq->lock)
+{
+ if (!recovery)
+ spin_lock(&eq->lock);
+ else
+ spin_lock_irqsave(&eq->lock, *flags);
+}
+
+static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery,
+ unsigned long *flags)
+ __releases(&eq->lock)
+{
+ if (!recovery)
+ spin_unlock(&eq->lock);
+ else
+ spin_unlock_irqrestore(&eq->lock, *flags);
+}
+
+enum async_eq_nb_action {
+ ASYNC_EQ_IRQ_HANDLER = 0,
+ ASYNC_EQ_RECOVER = 1,
+};
+
static int mlx5_eq_async_int(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -200,11 +202,16 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
struct mlx5_eq_table *eqt;
struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
+ unsigned long flags;
int num_eqes = 0;
+ bool recovery;
dev = eq->dev;
eqt = dev->priv.eq_table;
+ recovery = action == ASYNC_EQ_RECOVER;
+ mlx5_eq_async_int_lock(eq_async, recovery, &flags);
+
eqe = next_eqe_sw(eq);
if (!eqe)
goto out;
@@ -225,8 +232,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
out:
eq_update_ci(eq, 1);
+ mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
- return 0;
+ return unlikely(recovery) ? num_eqes : 0;
+}
+
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
+ int eqes;
+
+ eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
+ if (eqes)
+ mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
}
static void init_eq_buf(struct mlx5_eq *eq)
@@ -234,7 +252,7 @@ static void init_eq_buf(struct mlx5_eq *eq)
struct mlx5_eqe *eqe;
int i;
- for (i = 0; i < eq->nent; i++) {
+ for (i = 0; i < eq_get_size(eq); i++) {
eqe = get_eqe(eq, i);
eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
}
@@ -244,11 +262,13 @@ static int
create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct mlx5_eq_param *param)
{
+ u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
+ u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
struct mlx5_priv *priv = &dev->priv;
- u8 vecidx = param->irq_index;
__be64 *pas;
+ u16 vecidx;
void *eqc;
int inlen;
u32 *in;
@@ -260,16 +280,21 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
- eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
- err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
+
+ err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
+ &eq->frag_buf, dev->priv.numa_node);
if (err)
return err;
+ mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
init_eq_buf(eq);
+ eq->irq = param->irq;
+ vecidx = mlx5_irq_get_index(eq->irq);
+
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
- MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+ MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
@@ -278,7 +303,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
}
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
- mlx5_fill_page_array(&eq->buf, pas);
+ mlx5_fill_page_frag_array(&eq->frag_buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
@@ -289,11 +314,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
param->mask[i]);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
- MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
+ MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
MLX5_SET(eqc, eqc, intr, vecidx);
MLX5_SET(eqc, eqc, log_page_size,
- eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
if (err)
@@ -319,7 +344,7 @@ err_in:
kvfree(in);
err_buf:
- mlx5_buf_free(dev, &eq->buf);
+ mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
}
@@ -336,10 +361,9 @@ err_buf:
int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct notifier_block *nb)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
- err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+ err = mlx5_irq_attach_nb(eq->irq, nb);
if (!err)
eq_update_ci(eq, 1);
@@ -358,9 +382,7 @@ EXPORT_SYMBOL(mlx5_eq_enable);
void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct notifier_block *nb)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-
- mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+ mlx5_irq_detach_nb(eq->irq, nb);
}
EXPORT_SYMBOL(mlx5_eq_disable);
@@ -374,10 +396,8 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
- synchronize_irq(eq->irqn);
-
- mlx5_buf_free(dev, &eq->buf);
+ mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
}
@@ -418,7 +438,8 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
struct mlx5_eq_table *eq_table;
int i;
- eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+ eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL,
+ dev->priv.numa_node);
if (!eq_table)
return -ENOMEM;
@@ -430,7 +451,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
- eq_table->irq_table = dev->priv.irq_table;
+ eq_table->irq_table = mlx5_irq_table_get(dev);
return 0;
}
@@ -449,14 +470,7 @@ static int create_async_eq(struct mlx5_core_dev *dev,
int err;
mutex_lock(&eq_table->lock);
- /* Async EQs must share irq index 0 */
- if (param->irq_index != 0) {
- err = -EINVAL;
- goto unlock;
- }
-
err = create_map_eq(dev, eq, param);
-unlock:
mutex_unlock(&eq_table->lock);
return err;
}
@@ -558,6 +572,12 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
async_event_mask |=
(1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
+ if (MLX5_CAP_GEN_MAX(dev, vhca_state))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE);
+
+ if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE);
+
mask[0] = async_event_mask;
if (MLX5_CAP_GEN(dev, event_cap))
@@ -571,6 +591,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
int err;
eq->irq_nb.notifier_call = mlx5_eq_async_int;
+ spin_lock_init(&eq->lock);
err = create_async_eq(dev, &eq->core, param);
if (err) {
@@ -597,29 +618,52 @@ static void cleanup_async_eq(struct mlx5_core_dev *dev,
name, err);
}
+static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_NUM_ASYNC_EQE;
+}
static int create_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_param param = {};
int err;
+ /* All the async_eqs are using single IRQ, request one IRQ and share its
+ * index among all the async_eqs of this device.
+ */
+ table->ctrl_irq = mlx5_ctrl_irq_request(dev);
+ if (IS_ERR(table->ctrl_irq))
+ return PTR_ERR(table->ctrl_irq);
+
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) {
- .irq_index = 0,
+ .irq = table->ctrl_irq,
.nent = MLX5_NUM_CMD_EQE,
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
if (err)
goto err1;
mlx5_cmd_use_events(dev);
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) {
- .irq_index = 0,
- .nent = MLX5_NUM_ASYNC_EQE,
+ .irq = table->ctrl_irq,
+ .nent = async_eq_depth_devlink_param_get(dev),
};
gather_async_events_mask(dev, param.mask);
@@ -628,7 +672,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
goto err2;
param = (struct mlx5_eq_param) {
- .irq_index = 0,
+ .irq = table->ctrl_irq,
.nent = /* TODO: sriov max_vf + */ 1,
.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
};
@@ -645,7 +689,9 @@ err2:
mlx5_cmd_use_polling(dev);
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
err1:
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
return err;
}
@@ -655,9 +701,12 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->pages_eq, "pages");
cleanup_async_eq(dev, &table->async_eq, "async");
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
mlx5_cmd_use_polling(dev);
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
}
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
@@ -682,12 +731,14 @@ struct mlx5_eq *
mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq_param *param)
{
- struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+ struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL,
+ dev->priv.numa_node);
int err;
if (!eq)
return ERR_PTR(-ENOMEM);
+ param->irq = dev->priv.eq_table->ctrl_irq;
err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
@@ -718,10 +769,11 @@ EXPORT_SYMBOL(mlx5_eq_destroy_generic);
struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
{
u32 ci = eq->cons_index + cc;
+ u32 nent = eq_get_size(eq);
struct mlx5_eqe *eqe;
- eqe = get_eqe(eq, ci & (eq->nent - 1));
- eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+ eqe = get_eqe(eq, ci & (nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
/* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
@@ -746,6 +798,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ if (mlx5_core_is_sf(dev))
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+ else
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
+ kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+ u16 *cpus;
+ int ret;
+ int i;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+ if (mlx5_core_is_sf(dev)) {
+ ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+ }
+
+ cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
+ if (!cpus) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+ for (i = 0; i < ncomp_eqs; i++)
+ cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ kfree(cpus);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+
+free_irqs:
+ kfree(table->comp_irqs);
+ return ret;
+}
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -760,6 +860,22 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
tasklet_disable(&eq->tasklet_ctx.task);
kfree(eq);
}
+ comp_irqs_release(dev);
+}
+
+static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_COMP_EQ_SIZE;
}
static int create_comp_eqs(struct mlx5_core_dev *dev)
@@ -771,14 +887,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err;
int i;
+ ncomp_eqs = comp_irqs_request(dev);
+ if (ncomp_eqs < 0)
+ return ncomp_eqs;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_eqs = table->num_comp_eqs;
- nent = MLX5_COMP_EQ_SIZE;
+ nent = comp_eq_depth_devlink_param_get(dev);
+
for (i = 0; i < ncomp_eqs; i++) {
- int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
struct mlx5_eq_param param = {};
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node);
if (!eq) {
err = -ENOMEM;
goto clean;
@@ -787,24 +905,21 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
+ tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .irq_index = vecidx,
+ .irq = table->comp_irqs[i],
.nent = nent,
};
+
err = create_map_eq(dev, &eq->core, &param);
- if (err) {
- kfree(eq);
- goto clean;
- }
+ if (err)
+ goto clean_eq;
err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
if (err) {
destroy_unmap_eq(dev, &eq->core);
- kfree(eq);
- goto clean;
+ goto clean_eq;
}
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
@@ -812,15 +927,18 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
list_add_tail(&eq->list, &table->comp_eqs_list);
}
+ table->num_comp_eqs = ncomp_eqs;
return 0;
+clean_eq:
+ kfree(eq);
clean:
destroy_comp_eqs(dev);
return err;
}
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn)
+static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_comp *eq, *n;
@@ -829,8 +947,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
if (i++ == vector) {
- *eqn = eq->core.eqn;
- *irqn = eq->core.irqn;
+ if (irqn)
+ *irqn = eq->core.irqn;
+ if (eqn)
+ *eqn = eq->core.eqn;
err = 0;
break;
}
@@ -838,8 +958,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
return err;
}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
+{
+ return vector2eqnirqn(dev, vector, eqn, NULL);
+}
EXPORT_SYMBOL(mlx5_vector2eqn);
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
+{
+ return vector2eqnirqn(dev, vector, NULL, irqn);
+}
+
unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
{
return dev->priv.eq_table->num_comp_eqs;
@@ -849,17 +979,23 @@ EXPORT_SYMBOL(mlx5_comp_vectors_count);
struct cpumask *
mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
{
- int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int i = 0;
- return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
- vecidx);
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector)
+ break;
+ }
+
+ return mlx5_irq_get_affinity_mask(eq->core.irq);
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
{
- return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
+ return dev->priv.eq_table->rmap;
}
#endif
@@ -876,23 +1012,84 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
return ERR_PTR(-ENOENT);
}
+static void clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+ free_irq_cpu_rmap(eq_table->rmap);
+#endif
+}
+
+static int set_rmap(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+ int vecidx;
+
+ eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+ if (!eq_table->rmap) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+ goto err_out;
+ }
+
+ for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
+ err = irq_cpu_rmap_add(eq_table->rmap,
+ pci_irq_vector(mdev->pdev, vecidx));
+ if (err) {
+ mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+ err);
+ goto err_irq_cpu_rmap_add;
+ }
+ }
+ return 0;
+
+err_irq_cpu_rmap_add:
+ clear_rmap(mdev);
+err_out:
+#endif
+ return err;
+}
+
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+#define MLX5_MAX_ASYNC_EQS 4
+#else
+#define MLX5_MAX_ASYNC_EQS 3
+#endif
+
int mlx5_eq_table_create(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int max_eqs_sf;
int err;
eq_table->num_comp_eqs =
- mlx5_irq_get_num_comp(eq_table->irq_table);
+ min_t(int,
+ mlx5_irq_table_get_num_comp(eq_table->irq_table),
+ num_eqs - MLX5_MAX_ASYNC_EQS);
+ if (mlx5_core_is_sf(dev)) {
+ max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
+ mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
+ eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
+ max_eqs_sf);
+ }
err = create_async_eqs(dev);
if (err) {
@@ -900,6 +1097,18 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
goto err_async_eqs;
}
+ if (!mlx5_core_is_sf(dev)) {
+ /* rmap is a mapping between irq number and queue number.
+ * each irq can be assign only to a single rmap.
+ * since SFs share IRQs, rmap mapping cannot function correctly
+ * for irqs that are shared for different core/netdev RX rings.
+ * Hence we don't allow netdev rmap for SFs
+ */
+ err = set_rmap(dev);
+ if (err)
+ goto err_rmap;
+ }
+
err = create_comp_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create completion EQs\n");
@@ -908,6 +1117,9 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
return 0;
err_comp_eqs:
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
+err_rmap:
destroy_async_eqs(dev);
err_async_eqs:
return err;
@@ -915,6 +1127,8 @@ err_async_eqs:
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile
new file mode 100644
index 000000000000..c78512eed8d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
new file mode 100644
index 000000000000..60a73990017c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+ esw_acl_egress_vlan_destroy(vport);
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
+ mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
+ vport->egress.legacy.drop_rule = NULL;
+ }
+}
+
+static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_group *drop_grp;
+ u32 *flow_group_in;
+ int err = 0;
+
+ err = esw_acl_egress_vlan_grp_create(esw, vport);
+ if (err)
+ return err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(drop_grp)) {
+ err = PTR_ERR(drop_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto drop_grp_err;
+ }
+
+ vport->egress.legacy.drop_grp = drop_grp;
+ kvfree(flow_group_in);
+ return 0;
+
+drop_grp_err:
+ kvfree(flow_group_in);
+alloc_err:
+ esw_acl_egress_vlan_grp_destroy(vport);
+ return err;
+}
+
+static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) {
+ mlx5_destroy_flow_group(vport->egress.legacy.drop_grp);
+ vport->egress.legacy.drop_grp = NULL;
+ }
+ esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_fc *drop_counter = NULL;
+ struct mlx5_flow_act flow_act = {};
+ /* The egress acl table contains 2 rules:
+ * 1)Allow traffic with vlan_tag=vst_vlan_id
+ * 2)Drop all other traffic.
+ */
+ int table_size = 2;
+ int dest_num = 0;
+ int err = 0;
+
+ if (vport->egress.legacy.drop_counter) {
+ drop_counter = vport->egress.legacy.drop_counter;
+ } else if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
+ drop_counter = mlx5_fc_create(esw->dev, false);
+ if (IS_ERR(drop_counter)) {
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule counter err(%ld)\n",
+ vport->vport, PTR_ERR(drop_counter));
+ drop_counter = NULL;
+ }
+ vport->egress.legacy.drop_counter = drop_counter;
+ }
+
+ esw_acl_egress_lgcy_rules_destroy(vport);
+
+ if (!vport->info.vlan && !vport->info.qos) {
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ return 0;
+ }
+
+ if (!vport->egress.acl) {
+ vport->egress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ table_size);
+ if (IS_ERR(vport->egress.acl)) {
+ err = PTR_ERR(vport->egress.acl);
+ vport->egress.acl = NULL;
+ goto out;
+ }
+
+ err = esw_acl_egress_lgcy_groups_create(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ /* Allowed vlan rule */
+ err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+ if (err)
+ goto out;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach egress drop flow counter */
+ if (drop_counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter);
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->egress.legacy.drop_rule =
+ mlx5_add_flow_rules(vport->egress.acl, NULL,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->egress.legacy.drop_rule)) {
+ err = PTR_ERR(vport->egress.legacy.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.legacy.drop_rule = NULL;
+ goto out;
+ }
+
+ return err;
+
+out:
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ return err;
+}
+
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ goto clean_drop_counter;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
+
+ esw_acl_egress_lgcy_rules_destroy(vport);
+ esw_acl_egress_lgcy_groups_destroy(vport);
+ esw_acl_egress_table_destroy(vport);
+
+clean_drop_counter:
+ if (vport->egress.legacy.drop_counter) {
+ mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
+ vport->egress.legacy.drop_counter = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
new file mode 100644
index 000000000000..2e504c7461c6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.fwd_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.fwd_rule);
+ vport->egress.offloads.fwd_rule = NULL;
+}
+
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.bounce_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+ vport->egress.offloads.bounce_rule = NULL;
+}
+
+static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest)
+{
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+
+ esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n",
+ vport->vport, fwd_dest->vport.num);
+
+ /* Delete the old egress forward-to-vport rule if any */
+ esw_acl_egress_ofld_fwd2vport_destroy(vport);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ vport->egress.offloads.fwd_rule =
+ mlx5_add_flow_rules(vport->egress.acl, NULL,
+ &flow_act, fwd_dest, 1);
+ if (IS_ERR(vport->egress.offloads.fwd_rule)) {
+ err = PTR_ERR(vport->egress.offloads.fwd_rule);
+ esw_warn(esw->dev,
+ "vport(%d) failed to add fwd2vport acl rule err(%d)\n",
+ vport->vport, err);
+ vport->egress.offloads.fwd_rule = NULL;
+ }
+
+ return err;
+}
+
+static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest)
+{
+ int err = 0;
+ int action;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+ /* For prio tag mode, there is only 1 FTEs:
+ * 1) prio tag packets - pop the prio tag VLAN, allow
+ * Unmatched traffic is allowed by default
+ */
+ esw_debug(esw->dev,
+ "vport[%d] configure prio tag egress rules\n", vport->vport);
+
+ action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ /* prio tag vlan rule - pop it so vport receives untagged packets */
+ err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action);
+ if (err)
+ goto prio_err;
+ }
+
+ if (fwd_dest) {
+ err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest);
+ if (err)
+ goto fwd_err;
+ }
+
+ return 0;
+
+fwd_err:
+ esw_acl_egress_vlan_destroy(vport);
+prio_err:
+ return err;
+}
+
+static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
+{
+ esw_acl_egress_vlan_destroy(vport);
+ esw_acl_egress_ofld_fwd2vport_destroy(vport);
+ esw_acl_egress_ofld_bounce_rule_destroy(vport);
+}
+
+static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fwd_grp;
+ u32 *flow_group_in;
+ u32 flow_index = 0;
+ int ret = 0;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+ ret = esw_acl_egress_vlan_grp_create(esw, vport);
+ if (ret)
+ return ret;
+
+ flow_index++;
+ }
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(esw))
+ goto out;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ ret = -ENOMEM;
+ goto fwd_grp_err;
+ }
+
+ /* This group holds 1 FTE to forward all packets to other vport
+ * when bond vports is supported.
+ */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+ fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(fwd_grp)) {
+ ret = PTR_ERR(fwd_grp);
+ esw_warn(esw->dev,
+ "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n",
+ vport->vport, ret);
+ kvfree(flow_group_in);
+ goto fwd_grp_err;
+ }
+ vport->egress.offloads.fwd_grp = fwd_grp;
+ kvfree(flow_group_in);
+ return 0;
+
+fwd_grp_err:
+ esw_acl_egress_vlan_grp_destroy(vport);
+out:
+ return ret;
+}
+
+static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
+ vport->egress.offloads.fwd_grp = NULL;
+ }
+
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+ vport->egress.offloads.bounce_grp = NULL;
+ }
+
+ esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num);
+}
+
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int table_size = 0;
+ int err;
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ if (!esw_acl_egress_needed(esw, vport->vport))
+ return 0;
+
+ esw_acl_egress_ofld_rules_destroy(vport);
+
+ if (mlx5_esw_acl_egress_fwd2vport_supported(esw))
+ table_size++;
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ table_size++;
+ vport->egress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
+ if (IS_ERR(vport->egress.acl)) {
+ err = PTR_ERR(vport->egress.acl);
+ vport->egress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_egress_ofld_groups_create(esw, vport);
+ if (err)
+ goto group_err;
+
+ esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport);
+
+ err = esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+ if (err)
+ goto rules_err;
+
+ return 0;
+
+rules_err:
+ esw_acl_egress_ofld_groups_destroy(vport);
+group_err:
+ esw_acl_egress_table_destroy(vport);
+ return err;
+}
+
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport)
+{
+ esw_acl_egress_ofld_rules_destroy(vport);
+ esw_acl_egress_ofld_groups_destroy(vport);
+ esw_acl_egress_table_destroy(vport);
+}
+
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+ u16 passive_vport_num)
+{
+ struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num);
+ struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num);
+ struct mlx5_flow_destination fwd_dest = {};
+
+ if (IS_ERR(active_vport))
+ return PTR_ERR(active_vport);
+ if (IS_ERR(passive_vport))
+ return PTR_ERR(passive_vport);
+
+ /* Cleanup and recreate rules WITHOUT fwd2vport of active vport */
+ esw_acl_egress_ofld_rules_destroy(active_vport);
+ esw_acl_egress_ofld_rules_create(esw, active_vport, NULL);
+
+ /* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */
+ esw_acl_egress_ofld_rules_destroy(passive_vport);
+ fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ fwd_dest.vport.num = active_vport_num;
+ fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest);
+}
+
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ esw_acl_egress_ofld_rules_destroy(vport);
+ return esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
new file mode 100644
index 000000000000..45b839116212
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ int acl_supported;
+ u16 vport_num;
+ int err;
+
+ acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
+ MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) :
+ MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support);
+
+ if (!acl_supported)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ vport_num = vport->vport;
+ esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
+ ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
+
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
+ vport_num);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ ft_attr.max_fte = size;
+ ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
+ acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num,
+ ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err);
+ }
+ return acl;
+}
+
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest,
+ u16 vlan_id, u32 flow_action)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ if (vport->egress.allowed_vlan)
+ return -EEXIST;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = flow_action;
+ vport->egress.allowed_vlan =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, fwd_dest, 0);
+ if (IS_ERR(vport->egress.allowed_vlan)) {
+ err = PTR_ERR(vport->egress.allowed_vlan);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress vlan rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.allowed_vlan = NULL;
+ }
+
+ kvfree(spec);
+ return err;
+}
+
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
+ mlx5_del_flow_rules(vport->egress.allowed_vlan);
+ vport->egress.allowed_vlan = NULL;
+ }
+}
+
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *vlan_grp;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int ret = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(vlan_grp)) {
+ ret = PTR_ERR(vlan_grp);
+ esw_warn(esw->dev,
+ "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n",
+ vport->vport, ret);
+ goto out;
+ }
+ vport->egress.vlan_grp = vlan_grp;
+
+out:
+ kvfree(flow_group_in);
+ return ret;
+}
+
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) {
+ mlx5_destroy_flow_group(vport->egress.vlan_grp);
+ vport->egress.vlan_grp = NULL;
+ }
+}
+
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ return;
+
+ mlx5_destroy_flow_table(vport->egress.acl);
+ vport->egress.acl = NULL;
+}
+
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->ingress.acl)
+ return;
+
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+}
+
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->ingress.allow_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.allow_rule);
+ vport->ingress.allow_rule = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
new file mode 100644
index 000000000000..a47063fab57e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_HELPER_H__
+#define __MLX5_ESWITCH_ACL_HELPER_H__
+
+#include "eswitch.h"
+
+/* General acl helper functions */
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size);
+
+/* Egress acl helper functions */
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest,
+ u16 vlan_id, u32 flow_action);
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport);
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport);
+
+/* Ingress acl helper functions */
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport);
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
new file mode 100644
index 000000000000..b1a5199260f6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.legacy.drop_rule) {
+ mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
+ vport->ingress.legacy.drop_rule = NULL;
+ }
+ esw_acl_ingress_allow_rule_destroy(vport);
+}
+
+static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto spoof_err;
+ }
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+ vport->vport, err);
+ goto untagged_err;
+ }
+ vport->ingress.legacy.allow_untagged_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto allow_spoof_err;
+ }
+ vport->ingress.legacy.allow_spoofchk_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto drop_err;
+ }
+ vport->ingress.legacy.drop_grp = g;
+ kvfree(flow_group_in);
+ return 0;
+
+drop_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+ vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+ }
+allow_spoof_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+ vport->ingress.legacy.allow_untagged_only_grp = NULL;
+ }
+untagged_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+ }
+spoof_err:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.legacy.allow_spoofchk_only_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+ vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+ }
+ if (vport->ingress.legacy.allow_untagged_only_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+ vport->ingress.legacy.allow_untagged_only_grp = NULL;
+ }
+ if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+ }
+ if (vport->ingress.legacy.drop_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
+ vport->ingress.legacy.drop_grp = NULL;
+ }
+}
+
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_fc *counter = NULL;
+ /* The ingress acl table contains 4 groups
+ * (2 active rules at the same time -
+ * 1 allow rule from one of the first 3 groups.
+ * 1 drop rule from the last group):
+ * 1)Allow untagged traffic with smac=original mac.
+ * 2)Allow untagged traffic.
+ * 3)Allow traffic with smac=original mac.
+ * 4)Drop all other traffic.
+ */
+ int table_size = 4;
+ int dest_num = 0;
+ int err = 0;
+ u8 *smac_v;
+
+ esw_acl_ingress_lgcy_rules_destroy(vport);
+
+ if (vport->ingress.legacy.drop_counter) {
+ counter = vport->ingress.legacy.drop_counter;
+ } else if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
+ counter = mlx5_fc_create(esw->dev, false);
+ if (IS_ERR(counter)) {
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule counter failed\n",
+ vport->vport);
+ counter = NULL;
+ }
+ vport->ingress.legacy.drop_counter = counter;
+ }
+
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ return 0;
+ }
+
+ if (!vport->ingress.acl) {
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ table_size);
+ if (IS_ERR(vport->ingress.acl)) {
+ err = PTR_ERR(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_ingress_lgcy_groups_create(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (vport->info.vlan || vport->info.qos)
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+
+ if (vport->info.spoofchk) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.smac_15_0);
+ smac_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, vport->info.mac);
+ }
+
+ /* Create ingress allow rule */
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ goto out;
+ }
+
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ /* Attach drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->ingress.legacy.drop_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, NULL,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->ingress.legacy.drop_rule)) {
+ err = PTR_ERR(vport->ingress.legacy.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.legacy.drop_rule = NULL;
+ goto out;
+ }
+ kvfree(spec);
+ return 0;
+
+out:
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ kvfree(spec);
+ return err;
+}
+
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->ingress.acl))
+ goto clean_drop_counter;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
+
+ esw_acl_ingress_lgcy_rules_destroy(vport);
+ esw_acl_ingress_lgcy_groups_destroy(vport);
+ esw_acl_ingress_table_destroy(vport);
+
+clean_drop_counter:
+ if (vport->ingress.legacy.drop_counter) {
+ mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
+ vport->ingress.legacy.drop_counter = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
new file mode 100644
index 000000000000..a994e71e05c1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static bool
+esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport)
+{
+ return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport));
+}
+
+static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ /* For prio tag mode, there is only 1 FTEs:
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
+ * Unmatched traffic is allowed by default
+ */
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Untagged packets - push prio tag VLAN, allow */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.vlan[0].ethtype = ETH_P_8021Q;
+ flow_act.vlan[0].vid = 0;
+ flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.offloads.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ }
+
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress untagged allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ }
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+ u32 key;
+
+ key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
+ key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data, key);
+ MLX5_SET(set_action_in, action, offset,
+ ESW_SOURCE_PORT_METADATA_OFFSET);
+ MLX5_SET(set_action_in, action, length,
+ ESW_SOURCE_PORT_METADATA_BITS);
+
+ vport->ingress.offloads.modify_metadata =
+ mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action);
+ if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
+ err = PTR_ERR(vport->ingress.offloads.modify_metadata);
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp;
+ vport->ingress.offloads.modify_metadata_rule =
+ mlx5_add_flow_rules(vport->ingress.acl,
+ NULL, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ vport->ingress.offloads.modify_metadata_rule = NULL;
+ }
+ return err;
+}
+
+static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->ingress.offloads.modify_metadata_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ vport->ingress.offloads.modify_metadata_rule = NULL;
+}
+
+static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ int err = 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_act.fg = vport->ingress.offloads.drop_grp;
+ flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ }
+
+ vport->ingress.offloads.drop_rule = flow_rule;
+out:
+ return err;
+}
+
+static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->ingress.offloads.drop_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.offloads.drop_rule);
+ vport->ingress.offloads.drop_rule = NULL;
+}
+
+static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int err;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_acl_ingress_mod_metadata_create(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "vport(%d) create ingress modify metadata, err(%d)\n",
+ vport->vport, err);
+ return err;
+ }
+ }
+
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+ err = esw_acl_ingress_prio_tag_create(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "vport(%d) create ingress prio tag rule, err(%d)\n",
+ vport->vport, err);
+ goto prio_tag_err;
+ }
+ }
+
+ return 0;
+
+prio_tag_err:
+ esw_acl_ingress_mod_metadata_destroy(esw, vport);
+ return err;
+}
+
+static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_ingress_allow_rule_destroy(vport);
+ esw_acl_ingress_mod_metadata_destroy(esw, vport);
+ esw_acl_ingress_src_port_drop_destroy(esw, vport);
+}
+
+static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ u32 flow_index = 0;
+ int ret = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ if (vport->vport == MLX5_VPORT_UPLINK) {
+ /* This group can hold an FTE to drop all traffic.
+ * Need in case LAG is enabled.
+ */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, ret);
+ goto drop_err;
+ }
+ vport->ingress.offloads.drop_grp = g;
+ flow_index++;
+ }
+
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+ /* This group is to hold FTE to match untagged packets when prio_tag
+ * is enabled.
+ */
+ memset(flow_group_in, 0, inlen);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in, match_criteria);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+ vport->vport, ret);
+ goto prio_tag_err;
+ }
+ vport->ingress.offloads.metadata_prio_tag_grp = g;
+ flow_index++;
+ }
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ /* This group holds an FTE with no match to add metadata for
+ * tagged packets if prio-tag is enabled, or for all untagged
+ * traffic in case prio-tag is disabled.
+ */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, ret);
+ goto metadata_err;
+ }
+ vport->ingress.offloads.metadata_allmatch_grp = g;
+ }
+
+ kvfree(flow_group_in);
+ return 0;
+
+metadata_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+ vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+ }
+prio_tag_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp);
+ vport->ingress.offloads.drop_grp = NULL;
+ }
+drop_err:
+ kvfree(flow_group_in);
+ return ret;
+}
+
+static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.offloads.metadata_allmatch_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
+ vport->ingress.offloads.metadata_allmatch_grp = NULL;
+ }
+
+ if (vport->ingress.offloads.metadata_prio_tag_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+ vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+ }
+
+ if (vport->ingress.offloads.drop_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp);
+ vport->ingress.offloads.drop_grp = NULL;
+ }
+}
+
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int num_ftes = 0;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !esw_acl_ingress_prio_tag_enabled(esw, vport))
+ return 0;
+
+ esw_acl_ingress_allow_rule_destroy(vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ num_ftes++;
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ num_ftes++;
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport))
+ num_ftes++;
+
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ num_ftes);
+ if (IS_ERR(vport->ingress.acl)) {
+ err = PTR_ERR(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_ingress_ofld_groups_create(esw, vport);
+ if (err)
+ goto group_err;
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ err = esw_acl_ingress_ofld_rules_create(esw, vport);
+ if (err)
+ goto rules_err;
+
+ return 0;
+
+rules_err:
+ esw_acl_ingress_ofld_groups_destroy(vport);
+group_err:
+ esw_acl_ingress_table_destroy(vport);
+ return err;
+}
+
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+ esw_acl_ingress_ofld_groups_destroy(vport);
+ esw_acl_ingress_table_destroy(vport);
+}
+
+/* Caller must hold rtnl_lock */
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 metadata)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ int err;
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ err = PTR_ERR(vport);
+ goto out;
+ }
+
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+
+ vport->metadata = metadata ? metadata : vport->default_metadata;
+
+ /* Recreate ingress acl rules with vport->metadata */
+ err = esw_acl_ingress_ofld_rules_create(esw, vport);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ vport->metadata = vport->default_metadata;
+ return err;
+}
+
+int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (IS_ERR(vport)) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return PTR_ERR(vport);
+ }
+
+ return esw_acl_ingress_src_port_drop_create(esw, vport);
+}
+
+void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return;
+ }
+
+ esw_acl_ingress_src_port_drop_destroy(esw, vport);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
new file mode 100644
index 000000000000..44c152da3d83
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_LGCY_H__
+#define __MLX5_ESWITCH_ACL_LGCY_H__
+
+#include "eswitch.h"
+
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
new file mode 100644
index 000000000000..11d3d3978848
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_OFLD_H__
+#define __MLX5_ESWITCH_ACL_OFLD_H__
+
+#include "eswitch.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+ u16 passive_vport_num);
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num);
+
+static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw)
+{
+ return esw && esw->mode == MLX5_ESWITCH_OFFLOADS &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport);
+}
+
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 metadata);
+void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static void
+mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{}
+
+static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return 0;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
new file mode 100644
index 000000000000..4fbff7bcc155
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -0,0 +1,1853 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/build_bug.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+#include "lib/devcom.h"
+#include "bridge.h"
+#include "eswitch.h"
+#include "bridge_priv.h"
+#define CREATE_TRACE_POINTS
+#include "diag/bridge_tracepoint.h"
+
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000);
+
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000);
+
+#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
+
+enum {
+ MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+};
+
+static const struct rhashtable_params fdb_ht_params = {
+ .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key),
+ .key_len = sizeof(struct mlx5_esw_bridge_fdb_key),
+ .head_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, ht_node),
+ .automatic_shrinking = true,
+};
+
+enum {
+ MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
+};
+
+struct mlx5_esw_bridge {
+ int ifindex;
+ int refcnt;
+ struct list_head list;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
+ struct list_head fdb_list;
+ struct rhashtable fdb_ht;
+
+ struct mlx5_flow_table *egress_ft;
+ struct mlx5_flow_group *egress_vlan_fg;
+ struct mlx5_flow_group *egress_qinq_fg;
+ struct mlx5_flow_group *egress_mac_fg;
+ struct mlx5_flow_group *egress_miss_fg;
+ struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
+ struct mlx5_flow_handle *egress_miss_handle;
+ unsigned long ageing_time;
+ u32 flags;
+ u16 vlan_proto;
+};
+
+static void
+mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid,
+ unsigned long val)
+{
+ struct switchdev_notifier_fdb_info send_info = {};
+
+ send_info.addr = addr;
+ send_info.vid = vid;
+ send_info.offloaded = true;
+ call_switchdev_notifiers(val, dev, &send_info.info, NULL);
+}
+
+static void
+mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry)
+{
+ if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER)))
+ mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+ entry->key.vid,
+ SWITCHDEV_FDB_DEL_TO_BRIDGE);
+}
+
+static bool mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(struct mlx5_eswitch *esw)
+{
+ return BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) &&
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) >= sizeof(struct vlan_hdr) &&
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) >=
+ offsetof(struct vlan_ethhdr, h_vlan_proto);
+}
+
+static struct mlx5_pkt_reformat *
+mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw)
+{
+ struct mlx5_pkt_reformat_params reformat_params = {};
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR;
+ reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START;
+ reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto);
+ reformat_params.size = sizeof(struct vlan_hdr);
+ return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB);
+}
+
+static struct mlx5_flow_table *
+mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *fdb;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!ns) {
+ esw_warn(dev, "Failed to get FDB namespace\n");
+ return ERR_PTR(-ENOENT);
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.max_fte = max_fte;
+ ft_attr.level = level;
+ ft_attr.prio = FDB_BR_OFFLOAD;
+ fdb = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(fdb))
+ esw_warn(dev, "Failed to create bridge FDB Table (err=%ld)\n", PTR_ERR(fdb));
+
+ return fdb;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n",
+ vlan_proto, PTR_ERR(fg));
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned int to,
+ u16 vlan_proto, struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021Q, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_qinq_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021AD, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create MAC flow group for bridge ingress table (err=%ld)\n",
+ PTR_ERR(fg));
+
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *egress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(egress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create VLAN flow group for bridge egress table (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, egress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *egress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, egress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(egress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create bridge egress table MAC flow group (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(egress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create bridge egress table miss flow group (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static int
+mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_flow_group *mac_fg, *qinq_filter_fg, *qinq_fg, *vlan_filter_fg, *vlan_fg;
+ struct mlx5_flow_table *ingress_ft, *skip_ft;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return -EOPNOTSUPP;
+
+ ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+ esw);
+ if (IS_ERR(ingress_ft))
+ return PTR_ERR(ingress_ft);
+
+ skip_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+ esw);
+ if (IS_ERR(skip_ft)) {
+ err = PTR_ERR(skip_ft);
+ goto err_skip_tbl;
+ }
+
+ vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(esw, ingress_ft);
+ if (IS_ERR(vlan_fg)) {
+ err = PTR_ERR(vlan_fg);
+ goto err_vlan_fg;
+ }
+
+ vlan_filter_fg = mlx5_esw_bridge_ingress_vlan_filter_fg_create(esw, ingress_ft);
+ if (IS_ERR(vlan_filter_fg)) {
+ err = PTR_ERR(vlan_filter_fg);
+ goto err_vlan_filter_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_ingress_qinq_fg_create(esw, ingress_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ qinq_filter_fg = mlx5_esw_bridge_ingress_qinq_filter_fg_create(esw, ingress_ft);
+ if (IS_ERR(qinq_filter_fg)) {
+ err = PTR_ERR(qinq_filter_fg);
+ goto err_qinq_filter_fg;
+ }
+
+ mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft);
+ if (IS_ERR(mac_fg)) {
+ err = PTR_ERR(mac_fg);
+ goto err_mac_fg;
+ }
+
+ br_offloads->ingress_ft = ingress_ft;
+ br_offloads->skip_ft = skip_ft;
+ br_offloads->ingress_vlan_fg = vlan_fg;
+ br_offloads->ingress_vlan_filter_fg = vlan_filter_fg;
+ br_offloads->ingress_qinq_fg = qinq_fg;
+ br_offloads->ingress_qinq_filter_fg = qinq_filter_fg;
+ br_offloads->ingress_mac_fg = mac_fg;
+ return 0;
+
+err_mac_fg:
+ mlx5_destroy_flow_group(qinq_filter_fg);
+err_qinq_filter_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_filter_fg);
+err_vlan_filter_fg:
+ mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
+ mlx5_destroy_flow_table(skip_ft);
+err_skip_tbl:
+ mlx5_destroy_flow_table(ingress_ft);
+ return err;
+}
+
+static void
+mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ mlx5_destroy_flow_group(br_offloads->ingress_mac_fg);
+ br_offloads->ingress_mac_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_qinq_filter_fg);
+ br_offloads->ingress_qinq_filter_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_qinq_fg);
+ br_offloads->ingress_qinq_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_vlan_filter_fg);
+ br_offloads->ingress_vlan_filter_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg);
+ br_offloads->ingress_vlan_fg = NULL;
+ mlx5_destroy_flow_table(br_offloads->skip_ft);
+ br_offloads->skip_ft = NULL;
+ mlx5_destroy_flow_table(br_offloads->ingress_ft);
+ br_offloads->ingress_ft = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft,
+ struct mlx5_flow_table *skip_ft,
+ struct mlx5_pkt_reformat *pkt_reformat);
+
+static int
+mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg, *qinq_fg;
+ struct mlx5_pkt_reformat *miss_pkt_reformat = NULL;
+ struct mlx5_flow_handle *miss_handle = NULL;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_flow_table *egress_ft;
+ int err;
+
+ egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+ esw);
+ if (IS_ERR(egress_ft))
+ return PTR_ERR(egress_ft);
+
+ vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(esw, egress_ft);
+ if (IS_ERR(vlan_fg)) {
+ err = PTR_ERR(vlan_fg);
+ goto err_vlan_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_egress_qinq_fg_create(esw, egress_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft);
+ if (IS_ERR(mac_fg)) {
+ err = PTR_ERR(mac_fg);
+ goto err_mac_fg;
+ }
+
+ if (mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) {
+ miss_fg = mlx5_esw_bridge_egress_miss_fg_create(esw, egress_ft);
+ if (IS_ERR(miss_fg)) {
+ esw_warn(esw->dev, "Failed to create miss flow group (err=%ld)\n",
+ PTR_ERR(miss_fg));
+ miss_fg = NULL;
+ goto skip_miss_flow;
+ }
+
+ miss_pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw);
+ if (IS_ERR(miss_pkt_reformat)) {
+ esw_warn(esw->dev,
+ "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n",
+ PTR_ERR(miss_pkt_reformat));
+ miss_pkt_reformat = NULL;
+ mlx5_destroy_flow_group(miss_fg);
+ miss_fg = NULL;
+ goto skip_miss_flow;
+ }
+
+ miss_handle = mlx5_esw_bridge_egress_miss_flow_create(egress_ft,
+ br_offloads->skip_ft,
+ miss_pkt_reformat);
+ if (IS_ERR(miss_handle)) {
+ esw_warn(esw->dev, "Failed to create miss flow (err=%ld)\n",
+ PTR_ERR(miss_handle));
+ miss_handle = NULL;
+ mlx5_packet_reformat_dealloc(esw->dev, miss_pkt_reformat);
+ miss_pkt_reformat = NULL;
+ mlx5_destroy_flow_group(miss_fg);
+ miss_fg = NULL;
+ goto skip_miss_flow;
+ }
+ }
+skip_miss_flow:
+
+ bridge->egress_ft = egress_ft;
+ bridge->egress_vlan_fg = vlan_fg;
+ bridge->egress_qinq_fg = qinq_fg;
+ bridge->egress_mac_fg = mac_fg;
+ bridge->egress_miss_fg = miss_fg;
+ bridge->egress_miss_pkt_reformat = miss_pkt_reformat;
+ bridge->egress_miss_handle = miss_handle;
+ return 0;
+
+err_mac_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
+ mlx5_destroy_flow_table(egress_ft);
+ return err;
+}
+
+static void
+mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
+{
+ if (bridge->egress_miss_handle)
+ mlx5_del_flow_rules(bridge->egress_miss_handle);
+ if (bridge->egress_miss_pkt_reformat)
+ mlx5_packet_reformat_dealloc(bridge->br_offloads->esw->dev,
+ bridge->egress_miss_pkt_reformat);
+ if (bridge->egress_miss_fg)
+ mlx5_destroy_flow_group(bridge->egress_miss_fg);
+ mlx5_destroy_flow_group(bridge->egress_mac_fg);
+ mlx5_destroy_flow_group(bridge->egress_qinq_fg);
+ mlx5_destroy_flow_group(bridge->egress_vlan_fg);
+ mlx5_destroy_flow_table(bridge->egress_ft);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_destination dests[2] = {};
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *smac_v, *smac_c;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2;
+
+ smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, addr);
+ smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.smac_47_16);
+ eth_broadcast_addr(smac_c);
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+
+ if (vlan && vlan->pkt_reformat_push) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.pkt_reformat = vlan->pkt_reformat_push;
+ flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark;
+ } else if (vlan) {
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+ vlan->vid);
+ }
+
+ dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dests[0].ft = bridge->egress_ft;
+ dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dests[1].counter_id = counter_id;
+
+ handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests,
+ ARRAY_SIZE(dests));
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge)
+{
+ return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+ bridge, bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom;
+ static struct mlx5_flow_handle *handle;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return ERR_PTR(-ENODEV);
+
+ handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+ bridge, peer_esw);
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = br_offloads->skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *smac_v, *smac_c;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2;
+
+ smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, addr);
+ smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.smac_47_16);
+ eth_broadcast_addr(smac_c);
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
+
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+
+ handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+ .vport.num = vport_num,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *dmac_v, *dmac_c;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+ vport_num == MLX5_VPORT_UPLINK)
+ rule_spec->flow_context.flow_source =
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.dmac_47_16);
+ ether_addr_copy(dmac_v, addr);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.dmac_47_16);
+ eth_broadcast_addr(dmac_c);
+
+ if (vlan) {
+ if (vlan->pkt_reformat_pop) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.pkt_reformat = vlan->pkt_reformat_pop;
+ }
+
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+ vlan->vid);
+ }
+
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = esw_owner_vhca_id;
+ }
+ handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft,
+ struct mlx5_flow_table *skip_ft,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT,
+ .flags = FLOW_ACT_NO_APPEND,
+ .pkt_reformat = pkt_reformat,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_1,
+ ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK);
+
+ handle = mlx5_add_flow_rules(egress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge *bridge;
+ int err;
+
+ bridge = kvzalloc(sizeof(*bridge), GFP_KERNEL);
+ if (!bridge)
+ return ERR_PTR(-ENOMEM);
+
+ bridge->br_offloads = br_offloads;
+ err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge);
+ if (err)
+ goto err_egress_tbl;
+
+ err = rhashtable_init(&bridge->fdb_ht, &fdb_ht_params);
+ if (err)
+ goto err_fdb_ht;
+
+ INIT_LIST_HEAD(&bridge->fdb_list);
+ bridge->ifindex = ifindex;
+ bridge->refcnt = 1;
+ bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
+ bridge->vlan_proto = ETH_P_8021Q;
+ list_add(&bridge->list, &br_offloads->bridges);
+
+ return bridge;
+
+err_fdb_ht:
+ mlx5_esw_bridge_egress_table_cleanup(bridge);
+err_egress_tbl:
+ kvfree(bridge);
+ return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_get(struct mlx5_esw_bridge *bridge)
+{
+ bridge->refcnt++;
+}
+
+static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
+{
+ if (--bridge->refcnt)
+ return;
+
+ mlx5_esw_bridge_egress_table_cleanup(bridge);
+ list_del(&bridge->list);
+ rhashtable_destroy(&bridge->fdb_ht);
+ kvfree(bridge);
+
+ if (list_empty(&br_offloads->bridges))
+ mlx5_esw_bridge_ingress_table_cleanup(br_offloads);
+}
+
+static struct mlx5_esw_bridge *
+mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge *bridge;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(bridge, &br_offloads->bridges, list) {
+ if (bridge->ifindex == ifindex) {
+ mlx5_esw_bridge_get(bridge);
+ return bridge;
+ }
+ }
+
+ if (!br_offloads->ingress_ft) {
+ int err = mlx5_esw_bridge_ingress_table_init(br_offloads);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ bridge = mlx5_esw_bridge_create(ifindex, br_offloads);
+ if (IS_ERR(bridge) && list_empty(&br_offloads->bridges))
+ mlx5_esw_bridge_ingress_table_cleanup(br_offloads);
+ return bridge;
+}
+
+static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id)
+{
+ return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE;
+}
+
+static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
+{
+ return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id);
+}
+
+static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL);
+}
+
+static struct mlx5_esw_bridge_port *
+mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num,
+ esw_owner_vhca_id));
+}
+
+static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port));
+}
+
+static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry)
+{
+ trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
+
+ mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+ entry->key.vid,
+ SWITCHDEV_FDB_ADD_TO_BRIDGE);
+}
+
+static void
+mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
+ struct mlx5_esw_bridge *bridge)
+{
+ trace_mlx5_esw_bridge_fdb_entry_cleanup(entry);
+
+ rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
+ mlx5_del_flow_rules(entry->egress_handle);
+ if (entry->filter_handle)
+ mlx5_del_flow_rules(entry->filter_handle);
+ mlx5_del_flow_rules(entry->ingress_handle);
+ mlx5_fc_destroy(bridge->br_offloads->esw->dev, entry->ingress_counter);
+ list_del(&entry->vlan_list);
+ list_del(&entry->list);
+ kvfree(entry);
+}
+
+static void
+mlx5_esw_bridge_fdb_entry_notify_and_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
+ struct mlx5_esw_bridge *bridge)
+{
+ mlx5_esw_bridge_fdb_del_notify(entry);
+ mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+}
+
+static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port)
+{
+ return xa_load(&port->vlans, vid);
+}
+
+static int
+mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_eswitch *esw)
+{
+ struct {
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ } vlan_hdr = { htons(vlan_proto), htons(vlan->vid) };
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5_pkt_reformat *pkt_reformat;
+
+ if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_insert)) ||
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_size) < sizeof(vlan_hdr) ||
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_offset) <
+ offsetof(struct vlan_ethhdr, h_vlan_proto)) {
+ esw_warn(esw->dev, "Packet reformat INSERT_HEADER is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_INSERT_HDR;
+ reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START;
+ reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto);
+ reformat_params.size = sizeof(vlan_hdr);
+ reformat_params.data = &vlan_hdr;
+ pkt_reformat = mlx5_packet_reformat_alloc(esw->dev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(pkt_reformat)) {
+ esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n",
+ PTR_ERR(pkt_reformat));
+ return PTR_ERR(pkt_reformat);
+ }
+
+ vlan->pkt_reformat_push = pkt_reformat;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_push);
+ vlan->pkt_reformat_push = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ struct mlx5_pkt_reformat *pkt_reformat;
+
+ if (!mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) {
+ esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw);
+ if (IS_ERR(pkt_reformat)) {
+ esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n",
+ PTR_ERR(pkt_reformat));
+ return PTR_ERR(pkt_reformat);
+ }
+
+ vlan->pkt_reformat_pop = pkt_reformat;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_pop);
+ vlan->pkt_reformat_pop = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_push_mark_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_modify_hdr *pkt_mod_hdr;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
+ MLX5_SET(set_action_in, action, offset, 8);
+ MLX5_SET(set_action_in, action, length, ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS);
+ MLX5_SET(set_action_in, action, data, ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN);
+
+ pkt_mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 1, action);
+ if (IS_ERR(pkt_mod_hdr))
+ return PTR_ERR(pkt_mod_hdr);
+
+ vlan->pkt_mod_hdr_push_mark = pkt_mod_hdr;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ mlx5_modify_header_dealloc(esw->dev, vlan->pkt_mod_hdr_push_mark);
+ vlan->pkt_mod_hdr_push_mark = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_eswitch *esw)
+{
+ int err;
+
+ if (flags & BRIDGE_VLAN_INFO_PVID) {
+ err = mlx5_esw_bridge_vlan_push_create(vlan_proto, vlan, esw);
+ if (err)
+ return err;
+
+ err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw);
+ if (err)
+ goto err_vlan_push_mark;
+ }
+
+ if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
+ err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
+ if (err)
+ goto err_vlan_pop;
+ }
+
+ return 0;
+
+err_vlan_pop:
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+err_vlan_push_mark:
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+ return err;
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_bridge_port *port,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ int err;
+
+ vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return ERR_PTR(-ENOMEM);
+
+ vlan->vid = vid;
+ vlan->flags = flags;
+ INIT_LIST_HEAD(&vlan->fdb_list);
+
+ err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw);
+ if (err)
+ goto err_vlan_push_pop;
+
+ err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL);
+ if (err)
+ goto err_xa_insert;
+
+ trace_mlx5_esw_bridge_vlan_create(vlan);
+ return vlan;
+
+err_xa_insert:
+ if (vlan->pkt_reformat_pop)
+ mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+err_vlan_push_pop:
+ kvfree(vlan);
+ return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ xa_erase(&port->vlans, vlan->vid);
+}
+
+static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_eswitch *esw = bridge->br_offloads->esw;
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list)
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+
+ if (vlan->pkt_reformat_pop)
+ mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+}
+
+static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_esw_bridge *bridge)
+{
+ trace_mlx5_esw_bridge_vlan_cleanup(vlan);
+ mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ mlx5_esw_bridge_vlan_erase(port, vlan);
+ kvfree(vlan);
+}
+
+static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long index;
+
+ xa_for_each(&port->vlans, index, vlan)
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge);
+}
+
+static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&port->vlans, i, vlan) {
+ mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan,
+ br_offloads->esw);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n",
+ vlan->vid, bridge->vlan_proto, port->vport_num,
+ err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_esw_bridge_vlans_recreate(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ err = mlx5_esw_bridge_port_vlans_recreate(port, bridge);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge_vlan *vlan;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads);
+ if (!port) {
+ /* FDB is added asynchronously on wq while port might have been deleted
+ * concurrently. Report on 'info' logging level and skip the FDB offload.
+ */
+ esw_info(esw->dev, "Failed to lookup bridge port (vport=%u)\n", vport_num);
+ return ERR_PTR(-EINVAL);
+ }
+
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (!vlan) {
+ /* FDB is added asynchronously on wq while vlan might have been deleted
+ * concurrently. Report on 'info' logging level and skip the FDB offload.
+ */
+ esw_info(esw->dev, "Failed to lookup bridge port vlan metadata (vport=%u)\n",
+ vport_num);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return vlan;
+}
+
+static struct mlx5_esw_bridge_fdb_entry *
+mlx5_esw_bridge_fdb_lookup(struct mlx5_esw_bridge *bridge,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge_fdb_key key = {};
+
+ ether_addr_copy(key.addr, addr);
+ key.vid = vid;
+ return rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
+}
+
+static struct mlx5_esw_bridge_fdb_entry *
+mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid, bool added_by_user, bool peer,
+ struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_vlan *vlan = NULL;
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_flow_handle *handle;
+ struct mlx5_fc *counter;
+ int err;
+
+ if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
+ vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge,
+ esw);
+ if (IS_ERR(vlan))
+ return ERR_CAST(vlan);
+ }
+
+ entry = mlx5_esw_bridge_fdb_lookup(bridge, addr, vid);
+ if (entry)
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+
+ entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ ether_addr_copy(entry->key.addr, addr);
+ entry->key.vid = vid;
+ entry->dev = dev;
+ entry->vport_num = vport_num;
+ entry->esw_owner_vhca_id = esw_owner_vhca_id;
+ entry->lastuse = jiffies;
+ if (added_by_user)
+ entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER;
+ if (peer)
+ entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER;
+
+ counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_ingress_fc_create;
+ }
+ entry->ingress_counter = counter;
+
+ handle = peer ?
+ mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan,
+ mlx5_fc_id(counter), bridge) :
+ mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan,
+ mlx5_fc_id(counter), bridge);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n",
+ vport_num, err);
+ goto err_ingress_flow_create;
+ }
+ entry->ingress_handle = handle;
+
+ if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG) {
+ handle = mlx5_esw_bridge_ingress_filter_flow_create(vport_num, addr, bridge);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ esw_warn(esw->dev, "Failed to create ingress filter(vport=%u,err=%d)\n",
+ vport_num, err);
+ goto err_ingress_filter_flow_create;
+ }
+ entry->filter_handle = handle;
+ }
+
+ handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan,
+ bridge);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n",
+ vport_num, err);
+ goto err_egress_flow_create;
+ }
+ entry->egress_handle = handle;
+
+ err = rhashtable_insert_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
+ if (err) {
+ esw_warn(esw->dev, "Failed to insert FDB flow(vport=%u,err=%d)\n", vport_num, err);
+ goto err_ht_init;
+ }
+
+ if (vlan)
+ list_add(&entry->vlan_list, &vlan->fdb_list);
+ else
+ INIT_LIST_HEAD(&entry->vlan_list);
+ list_add(&entry->list, &bridge->fdb_list);
+
+ trace_mlx5_esw_bridge_fdb_entry_init(entry);
+ return entry;
+
+err_ht_init:
+ mlx5_del_flow_rules(entry->egress_handle);
+err_egress_flow_create:
+ if (entry->filter_handle)
+ mlx5_del_flow_rules(entry->filter_handle);
+err_ingress_filter_flow_create:
+ mlx5_del_flow_rules(entry->ingress_handle);
+err_ingress_flow_create:
+ mlx5_fc_destroy(esw->dev, entry->ingress_counter);
+err_ingress_fc_create:
+ kvfree(entry);
+ return ERR_PTR(err);
+}
+
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ port->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
+ return 0;
+}
+
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+ bool filtering;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ bridge = port->bridge;
+ filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+ if (filtering == enable)
+ return 0;
+
+ mlx5_esw_bridge_fdb_flush(bridge);
+ if (enable)
+ bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+ else
+ bridge->flags &= ~MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+
+ return 0;
+}
+
+int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id,
+ br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ bridge = port->bridge;
+ if (bridge->vlan_proto == proto)
+ return 0;
+ if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) {
+ esw_warn(br_offloads->esw->dev, "Can't set unsupported VLAN protocol %x", proto);
+ return -EOPNOTSUPP;
+ }
+
+ mlx5_esw_bridge_fdb_flush(bridge);
+ bridge->vlan_proto = proto;
+ mlx5_esw_bridge_vlans_recreate(bridge);
+
+ return 0;
+}
+
+static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_esw_bridge_port *port;
+ int err;
+
+ port = kvzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ port->vport_num = vport_num;
+ port->esw_owner_vhca_id = esw_owner_vhca_id;
+ port->bridge = bridge;
+ port->flags |= flags;
+ xa_init(&port->vlans);
+ err = mlx5_esw_bridge_port_insert(port, br_offloads);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+ port->vport_num, port->esw_owner_vhca_id, err);
+ goto err_port_insert;
+ }
+ trace_mlx5_esw_bridge_vport_init(port);
+
+ return 0;
+
+err_port_insert:
+ kvfree(port);
+ return err;
+}
+
+static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge_port *port)
+{
+ u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id;
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+ if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id)
+ mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+
+ trace_mlx5_esw_bridge_vport_cleanup(port);
+ mlx5_esw_bridge_port_vlans_flush(port, bridge);
+ mlx5_esw_bridge_port_erase(port, br_offloads);
+ kvfree(port);
+ mlx5_esw_bridge_put(br_offloads, bridge);
+ return 0;
+}
+
+static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge *bridge;
+ int err;
+
+ bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads);
+ if (IS_ERR(bridge)) {
+ NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex");
+ return PTR_ERR(bridge);
+ }
+
+ err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error initializing port");
+ goto err_vport;
+ }
+ return 0;
+
+err_vport:
+ mlx5_esw_bridge_put(br_offloads, bridge);
+ return err;
+}
+
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0,
+ br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_port *port;
+ int err;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
+ return -EINVAL;
+ }
+ if (port->bridge->ifindex != ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge");
+ return -EINVAL;
+ }
+
+ err = mlx5_esw_bridge_vport_cleanup(br_offloads, port);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed");
+ return err;
+}
+
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch))
+ return 0;
+
+ return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id,
+ MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+ br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads,
+ extack);
+}
+
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge_vlan *vlan;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (vlan) {
+ if (vlan->flags == flags)
+ return 0;
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
+ }
+
+ vlan = mlx5_esw_bridge_vlan_create(port->bridge->vlan_proto, vid, flags, port,
+ br_offloads->esw);
+ if (IS_ERR(vlan)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
+ return PTR_ERR(vlan);
+ }
+ return 0;
+}
+
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge_vlan *vlan;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (!vlan)
+ return;
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
+}
+
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
+ if (!entry) {
+ esw_debug(br_offloads->esw->dev,
+ "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+ fdb_info->addr, fdb_info->vid, vport_num);
+ return;
+ }
+
+ entry->lastuse = jiffies;
+}
+
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr,
+ fdb_info->vid, fdb_info->added_by_user,
+ port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+ br_offloads->esw, bridge);
+ if (IS_ERR(entry))
+ return;
+
+ if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+ mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+ SWITCHDEV_FDB_OFFLOADED);
+ else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER))
+ /* Take over dynamic entries to prevent kernel bridge from aging them out. */
+ mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+ SWITCHDEV_FDB_ADD_TO_BRIDGE);
+}
+
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
+ if (!entry) {
+ esw_warn(esw->dev,
+ "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+ fdb_info->addr, fdb_info->vid, vport_num);
+ return;
+ }
+
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+}
+
+void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+ struct mlx5_esw_bridge *bridge;
+
+ list_for_each_entry(bridge, &br_offloads->bridges, list) {
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
+ unsigned long lastuse =
+ (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter);
+
+ if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+ continue;
+
+ if (time_after(lastuse, entry->lastuse))
+ mlx5_esw_bridge_fdb_entry_refresh(entry);
+ else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) &&
+ time_is_before_jiffies(entry->lastuse + bridge->ageing_time))
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+ }
+ }
+}
+
+static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+
+ xa_for_each(&br_offloads->ports, i, port)
+ mlx5_esw_bridge_vport_cleanup(br_offloads, port);
+
+ WARN_ONCE(!list_empty(&br_offloads->bridges),
+ "Cleaning up bridge offloads while still having bridges attached\n");
+}
+
+struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
+ ASSERT_RTNL();
+
+ br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+ if (!br_offloads)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&br_offloads->bridges);
+ xa_init(&br_offloads->ports);
+ br_offloads->esw = esw;
+ esw->br_offloads = br_offloads;
+
+ return br_offloads;
+}
+
+void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+
+ ASSERT_RTNL();
+
+ if (!br_offloads)
+ return;
+
+ mlx5_esw_bridge_flush(br_offloads);
+ WARN_ON(!xa_empty(&br_offloads->ports));
+
+ esw->br_offloads = NULL;
+ kvfree(br_offloads);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
new file mode 100644
index 000000000000..10851a515bca
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_ESW_BRIDGE_H__
+#define __MLX5_ESW_BRIDGE_H__
+
+#include <linux/notifier.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+#include "eswitch.h"
+
+struct mlx5_flow_table;
+struct mlx5_flow_group;
+
+struct mlx5_esw_bridge_offloads {
+ struct mlx5_eswitch *esw;
+ struct list_head bridges;
+ struct xarray ports;
+
+ struct notifier_block netdev_nb;
+ struct notifier_block nb_blk;
+ struct notifier_block nb;
+ struct workqueue_struct *wq;
+ struct delayed_work update_work;
+
+ struct mlx5_flow_table *ingress_ft;
+ struct mlx5_flow_group *ingress_vlan_fg;
+ struct mlx5_flow_group *ingress_vlan_filter_fg;
+ struct mlx5_flow_group *ingress_qinq_fg;
+ struct mlx5_flow_group *ingress_qinq_filter_fg;
+ struct mlx5_flow_group *ingress_mac_fg;
+
+ struct mlx5_flow_table *skip_ft;
+};
+
+struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw);
+void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw);
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+
+#endif /* __MLX5_ESW_BRIDGE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
new file mode 100644
index 000000000000..878311fe950a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef _MLX5_ESW_BRIDGE_PRIVATE_
+#define _MLX5_ESW_BRIDGE_PRIVATE_
+
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/rhashtable.h>
+#include <linux/xarray.h>
+#include "fs_core.h"
+
+struct mlx5_esw_bridge_fdb_key {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
+enum {
+ MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
+ MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
+};
+
+enum {
+ MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0),
+};
+
+struct mlx5_esw_bridge_fdb_entry {
+ struct mlx5_esw_bridge_fdb_key key;
+ struct rhash_head ht_node;
+ struct net_device *dev;
+ struct list_head list;
+ struct list_head vlan_list;
+ u16 vport_num;
+ u16 esw_owner_vhca_id;
+ u16 flags;
+
+ struct mlx5_flow_handle *ingress_handle;
+ struct mlx5_fc *ingress_counter;
+ unsigned long lastuse;
+ struct mlx5_flow_handle *egress_handle;
+ struct mlx5_flow_handle *filter_handle;
+};
+
+struct mlx5_esw_bridge_vlan {
+ u16 vid;
+ u16 flags;
+ struct list_head fdb_list;
+ struct mlx5_pkt_reformat *pkt_reformat_push;
+ struct mlx5_pkt_reformat *pkt_reformat_pop;
+ struct mlx5_modify_hdr *pkt_mod_hdr_push_mark;
+};
+
+struct mlx5_esw_bridge_port {
+ u16 vport_num;
+ u16 esw_owner_vhca_id;
+ u16 flags;
+ struct mlx5_esw_bridge *bridge;
+ struct xarray vlans;
+};
+
+#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
new file mode 100644
index 000000000000..2db13c71e88c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include "eswitch.h"
+
+enum vnic_diag_counter {
+ MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
+ MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
+ MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
+ MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
+ MLX5_VNIC_DIAG_CQ_OVERRUN,
+ MLX5_VNIC_DIAG_INVALID_COMMAND,
+ MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+};
+
+static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+ u32 *val)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ struct mlx5_core_dev *dev = vport->dev;
+ u16 vport_num = vport->vport;
+ void *vnic_diag_out;
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+ if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
+ MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
+ switch (counter) {
+ case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
+ break;
+ case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
+ send_queue_priority_update_flow);
+ break;
+ case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_CQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_INVALID_COMMAND:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
+ break;
+ case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+ break;
+ }
+
+ return 0;
+}
+
+static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+ enum vnic_diag_counter type)
+{
+ u32 val = 0;
+ int ret;
+
+ ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+ if (ret)
+ return ret;
+
+ seq_printf(file, "%d\n", val);
+ return 0;
+}
+
+static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
+}
+
+static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private,
+ MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
+}
+
+static int comp_eq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
+}
+
+static int async_eq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
+}
+
+static int cq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
+}
+
+static int invalid_command_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
+}
+
+static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+}
+
+DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+DEFINE_SHOW_ATTRIBUTE(invalid_command);
+DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ debugfs_remove_recursive(vport->dbgfs);
+ vport->dbgfs = NULL;
+}
+
+/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
+#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ struct dentry *vnic_diag;
+ char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
+ int err;
+
+ if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return;
+
+ if (vport_num == MLX5_VPORT_PF) {
+ strcpy(dir_name, "pf");
+ } else if (vport_num == MLX5_VPORT_ECPF) {
+ strcpy(dir_name, "ecpf");
+ } else {
+ err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
+ is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
+ if (WARN_ON(err < 0))
+ return;
+ }
+
+ vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
+ vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
+
+ if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
+ debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
+ &total_q_under_processor_handle_fops);
+ debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
+ &send_queue_priority_update_flow_fops);
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
+ debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
+ &comp_eq_overrun_fops);
+ debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
+ &async_eq_overrun_fops);
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
+ debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
+ debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
+ &invalid_command_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+ debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+ &quota_exceeded_command_fops);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
new file mode 100644
index 000000000000..9bc7be95db54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd. */
+
+#include <linux/mlx5/driver.h>
+#include "eswitch.h"
+
+static void
+mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+ u64 parent_id;
+
+ parent_id = mlx5_query_nic_system_image_guid(dev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_UPLINK ||
+ (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
+static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct devlink_port_attrs attrs = {};
+ struct netdev_phys_item_id ppid = {};
+ struct devlink_port *dl_port;
+ u32 controller_num = 0;
+ bool external;
+ u16 pfnum;
+
+ dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL);
+ if (!dl_port)
+ return NULL;
+
+ mlx5_esw_get_port_parent_id(dev, &ppid);
+ pfnum = mlx5_get_dev_index(dev);
+ external = mlx5_core_is_ecpf_esw_manager(dev);
+ if (external)
+ controller_num = dev->priv.eswitch->offloads.host_number + 1;
+
+ if (vport_num == MLX5_VPORT_UPLINK) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = pfnum;
+ memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+ attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_set(dl_port, &attrs);
+ } else if (vport_num == MLX5_VPORT_PF) {
+ memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external);
+ } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) {
+ memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+ vport_num - 1, external);
+ }
+ return dl_port;
+}
+
+static void mlx5_esw_dl_port_free(struct devlink_port *dl_port)
+{
+ kfree(dl_port);
+}
+
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct devlink_port *dl_port;
+ unsigned int dl_port_index;
+ struct mlx5_vport *vport;
+ struct devlink *devlink;
+ int err;
+
+ if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+ return 0;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ dl_port = mlx5_esw_dl_port_alloc(esw, vport_num);
+ if (!dl_port)
+ return -ENOMEM;
+
+ devlink = priv_to_devlink(dev);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+ err = devl_port_register(devlink, dl_port, dl_port_index);
+ if (err)
+ goto reg_err;
+
+ err = devl_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
+ vport->dl_port = dl_port;
+ return 0;
+
+rate_err:
+ devl_port_unregister(dl_port);
+reg_err:
+ mlx5_esw_dl_port_free(dl_port);
+ return err;
+}
+
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+ return;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devl_rate_leaf_destroy(vport->dl_port);
+ }
+
+ devl_port_unregister(vport->dl_port);
+ mlx5_esw_dl_port_free(vport->dl_port);
+ vport->dl_port = NULL;
+}
+
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port;
+}
+
+int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct netdev_phys_item_id ppid = {};
+ unsigned int dl_port_index;
+ struct mlx5_vport *vport;
+ struct devlink *devlink;
+ u16 pfnum;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ pfnum = mlx5_get_dev_index(dev);
+ mlx5_esw_get_port_parent_id(dev, &ppid);
+ memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
+ devlink = priv_to_devlink(dev);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+ err = devl_port_register(devlink, dl_port, dl_port_index);
+ if (err)
+ return err;
+
+ err = devl_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
+ vport->dl_port = dl_port;
+ return 0;
+
+rate_err:
+ devl_port_unregister(dl_port);
+ return err;
+}
+
+void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devl_rate_leaf_destroy(vport->dl_port);
+ }
+
+ devl_port_unregister(vport->dl_port);
+ vport->dl_port = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
new file mode 100644
index 000000000000..51ac24e6ec3c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_BRIDGE_TRACEPOINT_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_BRIDGE_TRACEPOINT_
+
+#include <linux/tracepoint.h>
+#include "../bridge_priv.h"
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb),
+ TP_STRUCT__entry(
+ __array(char, dev_name, IFNAMSIZ)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __field(u16, flags)
+ __field(unsigned int, used)
+ ),
+ TP_fast_assign(
+ strscpy(__entry->dev_name,
+ netdev_name(fdb->dev),
+ IFNAMSIZ);
+ memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
+ __entry->vid = fdb->key.vid;
+ __entry->flags = fdb->flags;
+ __entry->used = jiffies_to_msecs(jiffies - fdb->lastuse)
+ ),
+ TP_printk("net_device=%s addr=%pM vid=%hu flags=%hx used=%u",
+ __entry->dev_name,
+ __entry->addr,
+ __entry->vid,
+ __entry->flags,
+ __entry->used / 1000)
+ );
+
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+ mlx5_esw_bridge_fdb_entry_init,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+ mlx5_esw_bridge_fdb_entry_refresh,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+ mlx5_esw_bridge_fdb_entry_cleanup,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_vlan_template,
+ TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+ TP_ARGS(vlan),
+ TP_STRUCT__entry(
+ __field(u16, vid)
+ __field(u16, flags)
+ ),
+ TP_fast_assign(
+ __entry->vid = vlan->vid;
+ __entry->flags = vlan->flags;
+ ),
+ TP_printk("vid=%hu flags=%hx",
+ __entry->vid,
+ __entry->flags)
+ );
+
+DEFINE_EVENT(mlx5_esw_bridge_vlan_template,
+ mlx5_esw_bridge_vlan_create,
+ TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+ TP_ARGS(vlan)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_vlan_template,
+ mlx5_esw_bridge_vlan_cleanup,
+ TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+ TP_ARGS(vlan)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template,
+ TP_PROTO(const struct mlx5_esw_bridge_port *port),
+ TP_ARGS(port),
+ TP_STRUCT__entry(
+ __field(u16, vport_num)
+ __field(u16, esw_owner_vhca_id)
+ __field(u16, flags)
+ ),
+ TP_fast_assign(
+ __entry->vport_num = port->vport_num;
+ __entry->esw_owner_vhca_id = port->esw_owner_vhca_id;
+ __entry->flags = port->flags;
+ ),
+ TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx",
+ __entry->vport_num,
+ __entry->esw_owner_vhca_id,
+ __entry->flags)
+ );
+
+DEFINE_EVENT(mlx5_esw_bridge_port_template,
+ mlx5_esw_bridge_vport_init,
+ TP_PROTO(const struct mlx5_esw_bridge_port *port),
+ TP_ARGS(port)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_port_template,
+ mlx5_esw_bridge_vport_cleanup,
+ TP_PROTO(const struct mlx5_esw_bridge_port *port),
+ TP_ARGS(port)
+ );
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE bridge_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
new file mode 100644
index 000000000000..458baf0c6415
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_TP_
+
+#include <linux/tracepoint.h>
+#include "eswitch.h"
+
+TRACE_EVENT(mlx5_esw_vport_qos_destroy,
+ TP_PROTO(const struct mlx5_vport *vport),
+ TP_ARGS(vport),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix
+ )
+);
+
+DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ __field(void *, group)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ __entry->group = vport->qos.group;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate, __entry->group
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+TRACE_EVENT(mlx5_esw_group_qos_config,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix, u32 bw_share, u32 max_rate),
+ TP_ARGS(dev, group, tsar_ix, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate
+ )
+);
+#endif /* _MLX5_ESW_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE qos_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
new file mode 100644
index 000000000000..c9a91158e99c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+#include "fs_core.h"
+#include "esw/indir_table.h"
+#include "lib/fs_chains.h"
+#include "en/mod_hdr.h"
+
+#define MLX5_ESW_INDIR_TABLE_SIZE 128
+#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
+#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
+
+struct mlx5_esw_indir_table_rule {
+ struct list_head list;
+ struct mlx5_flow_handle *handle;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip;
+ u32 vni;
+ struct mlx5_modify_hdr *mh;
+ refcount_t refcnt;
+};
+
+struct mlx5_esw_indir_table_entry {
+ struct hlist_node hlist;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *recirc_grp;
+ struct mlx5_flow_group *fwd_grp;
+ struct mlx5_flow_handle *fwd_rule;
+ struct list_head recirc_rules;
+ int recirc_cnt;
+ int fwd_ref;
+
+ u16 vport;
+ u8 ip_version;
+};
+
+struct mlx5_esw_indir_table {
+ struct mutex lock; /* protects table */
+ DECLARE_HASHTABLE(table, 8);
+};
+
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void)
+{
+ struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
+
+ if (!indir)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&indir->lock);
+ hash_init(indir->table);
+ return indir;
+}
+
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
+{
+ mutex_destroy(&indir->lock);
+ kvfree(indir);
+}
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ bool vf_sf_vport;
+
+ vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
+
+ /* Use indirect table for all IP traffic from UL to VF with vport
+ * destination when source rewrite flag is set.
+ */
+ return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
+ vf_sf_vport &&
+ esw->dev == dest_mdev &&
+ attr->ip_version &&
+ attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
+}
+
+u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
+}
+
+static struct mlx5_esw_indir_table_rule *
+mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_esw_indir_table_rule *rule;
+
+ list_for_each_entry(rule, &e->recirc_rules, list)
+ if (rule->vni == attr->rx_tun_attr->vni &&
+ !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
+ sizeof(attr->rx_tun_attr->dst_ip)))
+ goto found;
+ return NULL;
+
+found:
+ refcount_inc(&rule->refcnt);
+ return rule;
+}
+
+static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_esw_indir_table_rule *rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ int err = 0;
+ u32 data;
+
+ rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
+ if (rule)
+ return 0;
+
+ if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
+ return -EINVAL;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return -ENOMEM;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS |
+ MLX5_MATCH_MISC_PARAMETERS_2;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ outer_headers.ip_version, 0xf);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
+ attr->ip_version);
+ } else if (attr->ip_version) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
+ (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_ethertype;
+ }
+
+ if (attr->ip_version == 4) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ MLX5_SET(fte_match_param, rule_spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
+ } else if (attr->ip_version == 6) {
+ int len = sizeof(struct in6_addr);
+
+ memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, len);
+ memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &esw_attr->rx_tun_attr->dst_ip.v6, len);
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ misc_parameters.vxlan_vni);
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
+ MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
+ MLX5_VPORT_UPLINK));
+
+ /* Modify flow source to recirculate packet */
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err)
+ goto err_mod_hdr_regc0;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
+ if (err)
+ goto err_mod_hdr_regc1;
+
+ flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts.num_actions, mod_acts.actions);
+ if (IS_ERR(flow_act.modify_hdr)) {
+ err = PTR_ERR(flow_act.modify_hdr);
+ goto err_mod_hdr_alloc;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (IS_ERR(dest.ft)) {
+ err = PTR_ERR(dest.ft);
+ goto err_table;
+ }
+ handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto err_handle;
+ }
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ rule->handle = handle;
+ rule->vni = esw_attr->rx_tun_attr->vni;
+ rule->mh = flow_act.modify_hdr;
+ memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
+ sizeof(esw_attr->rx_tun_attr->dst_ip));
+ refcount_set(&rule->refcnt, 1);
+ list_add(&rule->list, &e->recirc_rules);
+ e->recirc_cnt++;
+ goto out;
+
+err_handle:
+ mlx5_chains_put_table(chains, 0, 1, 0);
+err_table:
+ mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
+err_mod_hdr_alloc:
+err_mod_hdr_regc1:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+err_mod_hdr_regc0:
+err_ethertype:
+ kfree(rule);
+out:
+ kvfree(rule_spec);
+ return err;
+}
+
+static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5_esw_indir_table_rule *rule;
+
+ list_for_each_entry(rule, &e->recirc_rules, list)
+ if (rule->vni == esw_attr->rx_tun_attr->vni &&
+ !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
+ sizeof(esw_attr->rx_tun_attr->dst_ip)))
+ goto found;
+
+ return;
+
+found:
+ if (!refcount_dec_and_test(&rule->refcnt))
+ return;
+
+ mlx5_del_flow_rules(rule->handle);
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_modify_header_dealloc(esw->dev, rule->mh);
+ list_del(&rule->list);
+ kfree(rule);
+ e->recirc_cnt--;
+}
+
+static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
+ MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
+
+ if (attr->ip_version == 4) {
+ MLX5_SET_TO_ONES(fte_match_param, match,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else if (attr->ip_version == 6) {
+ memset(MLX5_ADDR_OF(fte_match_param, match,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, sizeof(struct in6_addr));
+ } else {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
+ e->recirc_grp = mlx5_create_flow_group(e->ft, in);
+ if (IS_ERR(e->recirc_grp)) {
+ err = PTR_ERR(e->recirc_grp);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&e->recirc_rules);
+ e->recirc_cnt = 0;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ /* Hold one entry */
+ MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
+ e->fwd_grp = mlx5_create_flow_group(e->ft, in);
+ if (IS_ERR(e->fwd_grp)) {
+ err = PTR_ERR(e->fwd_grp);
+ goto err_out;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = e->vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(e->fwd_rule)) {
+ mlx5_destroy_flow_group(e->fwd_grp);
+ err = PTR_ERR(e->fwd_rule);
+ }
+
+err_out:
+ kvfree(spec);
+ kvfree(in);
+ return err;
+}
+
+static struct mlx5_esw_indir_table_entry *
+mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec, u16 vport, bool decap)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_esw_indir_table_entry *e;
+ struct mlx5_flow_table *ft;
+ int err = 0;
+
+ root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns)
+ return ERR_PTR(-ENOENT);
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+
+ ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto tbl_err;
+ }
+ e->ft = ft;
+ e->vport = vport;
+ e->ip_version = attr->ip_version;
+ e->fwd_ref = !decap;
+
+ err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
+ if (err)
+ goto recirc_grp_err;
+
+ if (decap) {
+ err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
+ if (err)
+ goto recirc_rule_err;
+ }
+
+ err = mlx5_create_indir_fwd_group(esw, e);
+ if (err)
+ goto fwd_grp_err;
+
+ hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
+ vport << 16 | attr->ip_version);
+
+ return e;
+
+fwd_grp_err:
+ if (decap)
+ mlx5_esw_indir_table_rule_put(esw, attr, e);
+recirc_rule_err:
+ mlx5_destroy_flow_group(e->recirc_grp);
+recirc_grp_err:
+ mlx5_destroy_flow_table(e->ft);
+tbl_err:
+ kfree(e);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_esw_indir_table_entry *
+mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
+{
+ struct mlx5_esw_indir_table_entry *e;
+ u32 key = vport << 16 | ip_version;
+
+ hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
+ if (e->vport == vport && e->ip_version == ip_version)
+ return e;
+
+ return NULL;
+}
+
+struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ u16 vport, bool decap)
+{
+ struct mlx5_esw_indir_table_entry *e;
+ int err;
+
+ mutex_lock(&esw->fdb_table.offloads.indir->lock);
+ e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
+ if (e) {
+ if (!decap) {
+ e->fwd_ref++;
+ } else {
+ err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
+ if (err)
+ goto out_err;
+ }
+ } else {
+ e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
+ if (IS_ERR(e)) {
+ err = PTR_ERR(e);
+ esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
+ goto out_err;
+ }
+ }
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+ return e->ft;
+
+out_err:
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+ return ERR_PTR(err);
+}
+
+void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+ struct mlx5_esw_indir_table_entry *e;
+
+ mutex_lock(&esw->fdb_table.offloads.indir->lock);
+ e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
+ if (!e)
+ goto out;
+
+ if (!decap)
+ e->fwd_ref--;
+ else
+ mlx5_esw_indir_table_rule_put(esw, attr, e);
+
+ if (e->fwd_ref || e->recirc_cnt)
+ goto out;
+
+ hash_del(&e->hlist);
+ mlx5_destroy_flow_group(e->recirc_grp);
+ mlx5_del_flow_rules(e->fwd_rule);
+ mlx5_destroy_flow_group(e->fwd_grp);
+ mlx5_destroy_flow_table(e->ft);
+ kfree(e);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
new file mode 100644
index 000000000000..21d56b49d14b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_ESW_FT_H__
+#define __MLX5_ESW_FT_H__
+
+#ifdef CONFIG_MLX5_CLS_ACT
+
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void);
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir);
+
+struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ u16 vport, bool decap);
+void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap);
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev);
+
+u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr);
+
+#else
+/* indir API stubs */
+static inline struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void)
+{
+ return NULL;
+}
+
+static inline void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
+{
+}
+
+static inline struct mlx5_flow_table *
+mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ u16 vport, bool decap)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+}
+
+static inline bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev)
+{
+ return false;
+}
+
+static inline u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+#endif
+
+#endif /* __MLX5_ESW_FT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
new file mode 100644
index 000000000000..fabe49a35a5c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "esw/acl/lgcy.h"
+#include "esw/legacy.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "fs_core.h"
+#include "fs_ft_pool.h"
+#include "esw/qos.h"
+
+enum {
+ LEGACY_VEPA_PRIO = 0,
+ LEGACY_FDB_PRIO,
+};
+
+static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ int err;
+
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* num FTE 2, num FG 2 */
+ ft_attr.prio = LEGACY_VEPA_PRIO;
+ ft_attr.max_fte = 2;
+ ft_attr.autogroup.max_num_groups = 2;
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
+ return err;
+ }
+ esw->fdb_table.legacy.vepa_fdb = fdb;
+
+ return 0;
+}
+
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ esw_debug(esw->dev, "Destroy FDB Table\n");
+ if (!esw->fdb_table.legacy.fdb)
+ return;
+
+ if (esw->fdb_table.legacy.promisc_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+ if (esw->fdb_table.legacy.allmulti_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ if (esw->fdb_table.legacy.addr_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+
+ esw->fdb_table.legacy.fdb = NULL;
+ esw->fdb_table.legacy.addr_grp = NULL;
+ esw->fdb_table.legacy.allmulti_grp = NULL;
+ esw->fdb_table.legacy.promisc_grp = NULL;
+ atomic64_set(&esw->user_count, 0);
+}
+
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int table_size;
+ u32 *flow_group_in;
+ u8 *dmac;
+ int err = 0;
+
+ esw_debug(dev, "Create FDB log_max_size(%d)\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.max_fte = POOL_NEXT_SIZE;
+ ft_attr.prio = LEGACY_FDB_PRIO;
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create FDB Table err %d\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.fdb = fdb;
+ table_size = fdb->max_fte;
+
+ /* Addresses group : Full match unicast/multicast addresses */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ /* Preserve 2 entries for allmulti and promisc rules*/
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
+ eth_broadcast_addr(dmac);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.addr_grp = g;
+
+ /* Allmulti group : One rule that forwards any mcast traffic */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
+ eth_zero_addr(dmac);
+ dmac[0] = 0x01;
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.allmulti_grp = g;
+
+ /* Promiscuous group :
+ * One rule that forward all unmatched traffic from previous groups
+ */
+ eth_zero_addr(dmac);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.promisc_grp = g;
+
+out:
+ if (err)
+ esw_destroy_legacy_fdb_table(esw);
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+ esw_debug(esw->dev, "Destroy VEPA Table\n");
+ if (!esw->fdb_table.legacy.vepa_fdb)
+ return;
+
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
+ esw->fdb_table.legacy.vepa_fdb = NULL;
+}
+
+static int esw_create_legacy_table(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+ atomic64_set(&esw->user_count, 0);
+
+ err = esw_create_legacy_vepa_table(esw);
+ if (err)
+ return err;
+
+ err = esw_create_legacy_fdb_table(esw);
+ if (err)
+ esw_destroy_legacy_vepa_table(esw);
+
+ return err;
+}
+
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.legacy.vepa_uplink_rule)
+ mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
+
+ if (esw->fdb_table.legacy.vepa_star_rule)
+ mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
+
+ esw->fdb_table.legacy.vepa_uplink_rule = NULL;
+ esw->fdb_table.legacy.vepa_star_rule = NULL;
+}
+
+static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
+{
+ esw_cleanup_vepa_rules(esw);
+ esw_destroy_legacy_fdb_table(esw);
+ esw_destroy_legacy_vepa_table(esw);
+}
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+ MLX5_VPORT_MC_ADDR_CHANGE | \
+ MLX5_VPORT_PROMISC_CHANGE)
+
+int esw_legacy_enable(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int ret;
+
+ ret = esw_create_legacy_table(esw);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+
+ ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
+ if (ret)
+ esw_destroy_legacy_table(esw);
+ return ret;
+}
+
+void esw_legacy_disable(struct mlx5_eswitch *esw)
+{
+ struct esw_mc_addr *mc_promisc;
+
+ mlx5_eswitch_disable_pf_vf_vports(esw);
+
+ mc_promisc = &esw->mc_promisc;
+ if (mc_promisc->uplink_rule)
+ mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+ esw_destroy_legacy_table(esw);
+}
+
+static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
+ u8 setting)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ if (!setting) {
+ esw_cleanup_vepa_rules(esw);
+ return 0;
+ }
+
+ if (esw->fdb_table.legacy.vepa_uplink_rule)
+ return 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Uplink rule forward uplink traffic to FDB */
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = esw->fdb_table.legacy.fdb;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ } else {
+ esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
+ }
+
+ /* Star rule to forward all traffic to uplink vport */
+ memset(&dest, 0, sizeof(dest));
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = MLX5_VPORT_UPLINK;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ } else {
+ esw->fdb_table.legacy.vepa_star_rule = flow_rule;
+ }
+
+out:
+ kvfree(spec);
+ if (err)
+ esw_cleanup_vepa_rules(esw);
+ return err;
+}
+
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+{
+ int err = 0;
+
+ if (!esw)
+ return -EOPNOTSUPP;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = _mlx5_eswitch_set_vepa_locked(esw, setting);
+
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+{
+ if (!esw)
+ return -EOPNOTSUPP;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ if (esw->mode != MLX5_ESWITCH_LEGACY)
+ return -EOPNOTSUPP;
+
+ *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+ return 0;
+}
+
+int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int ret;
+
+ /* Only non manager vports need ACL in legacy mode */
+ if (mlx5_esw_is_manager_vport(esw, vport->vport))
+ return 0;
+
+ ret = esw_acl_ingress_lgcy_setup(esw, vport);
+ if (ret)
+ goto ingress_err;
+
+ ret = esw_acl_egress_lgcy_setup(esw, vport);
+ if (ret)
+ goto egress_err;
+
+ return 0;
+
+egress_err:
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ingress_err:
+ return ret;
+}
+
+void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ if (mlx5_esw_is_manager_vport(esw, vport->vport))
+ return;
+
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+}
+
+int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev,
+ struct mlx5_vport *vport,
+ struct mlx5_vport_drop_stats *stats)
+{
+ u64 rx_discard_vport_down, tx_discard_vport_down;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ u64 bytes = 0;
+ int err = 0;
+
+ if (esw->mode != MLX5_ESWITCH_LEGACY)
+ return 0;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled)
+ goto unlock;
+
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter))
+ mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
+ &stats->rx_dropped, &bytes);
+
+ if (vport->ingress.legacy.drop_counter)
+ mlx5_fc_query(dev, vport->ingress.legacy.drop_counter,
+ &stats->tx_dropped, &bytes);
+
+ if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+ !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ goto unlock;
+
+ err = mlx5_query_vport_down_stats(dev, vport->vport, 1,
+ &rx_discard_vport_down,
+ &tx_discard_vport_down);
+ if (err)
+ goto unlock;
+
+ if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+ stats->rx_dropped += rx_discard_vport_down;
+ if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ stats->tx_dropped += tx_discard_vport_down;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ u16 vport, u16 vlan, u8 qos)
+{
+ u8 set_flags = 0;
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return vlan ? -EPERM : 0;
+
+ if (vlan || qos)
+ set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ if (!vlan)
+ goto unlock; /* compatibility with libvirt */
+
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ u16 vport, bool spoofchk)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ bool pschk;
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ pschk = evport->info.spoofchk;
+ evport->info.spoofchk = spoofchk;
+ if (pschk && !is_valid_ether_addr(evport->info.mac))
+ mlx5_core_warn(esw->dev,
+ "Spoofchk in set while MAC is invalid, vport(%d)\n",
+ evport->vport);
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
+ if (err)
+ evport->info.spoofchk = pschk;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ u16 vport, bool setting)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ evport->info.trusted = setting;
+ if (evport->enabled)
+ esw_vport_change_handle_locked(evport);
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
+ u32 max_rate, u32 min_rate)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h
new file mode 100644
index 000000000000..e0820bb72b57
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_ESW_LEGACY_H__
+#define __MLX5_ESW_LEGACY_H__
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+ MLX5_VPORT_MC_ADDR_CHANGE | \
+ MLX5_VPORT_PROMISC_CHANGE)
+
+struct mlx5_eswitch;
+
+int esw_legacy_enable(struct mlx5_eswitch *esw);
+void esw_legacy_disable(struct mlx5_eswitch *esw);
+
+int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev,
+ struct mlx5_vport *vport,
+ struct mlx5_vport_drop_stats *stats);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
new file mode 100644
index 000000000000..4f8a24d84a86
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -0,0 +1,955 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "eswitch.h"
+#include "esw/qos.h"
+#include "en/port.h"
+#define CREATE_TRACE_POINTS
+#include "diag/qos_tracepoint.h"
+
+/* Minimum supported BW share value by the HW is 1 Mbit/sec */
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+ min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
+
+struct mlx5_esw_rate_group {
+ u32 tsar_ix;
+ u32 max_rate;
+ u32 min_rate;
+ u32 bw_share;
+ struct list_head list;
+};
+
+static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
+ u32 parent_ix, u32 tsar_ix,
+ u32 max_rate, u32 bw_share)
+{
+ u32 bitmask = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+ return mlx5_modify_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ tsar_ix,
+ bitmask);
+}
+
+static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ err = esw_qos_tsar_config(dev, sched_ctx,
+ esw->qos.root_tsar_ix, group->tsar_ix,
+ max_rate, bw_share);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
+
+ trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
+
+ return err;
+}
+
+static int esw_qos_vport_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share,
+ struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+
+ err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
+ max_rate, bw_share);
+ if (err) {
+ esw_warn(esw->dev,
+ "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
+ return err;
+ }
+
+ trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
+
+ return 0;
+}
+
+static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ bool group_level)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_vport *evport;
+ u32 max_guarantee = 0;
+ unsigned long i;
+
+ if (group_level) {
+ struct mlx5_esw_rate_group *group;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ if (group->min_rate < max_guarantee)
+ continue;
+ max_guarantee = group->min_rate;
+ }
+ } else {
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled ||
+ evport->qos.group != group || evport->qos.min_rate < max_guarantee)
+ continue;
+ max_guarantee = evport->qos.min_rate;
+ }
+ }
+
+ if (max_guarantee)
+ return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+ if (!group_level && !max_guarantee && group && group->bw_share)
+ return 1;
+ return 0;
+}
+
+static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
+{
+ if (divider)
+ return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
+
+ return 0;
+}
+
+static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
+ struct mlx5_vport *evport;
+ unsigned long i;
+ u32 bw_share;
+ int err;
+
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
+ continue;
+ bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == evport->qos.bw_share)
+ continue;
+
+ err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ evport->qos.bw_share = bw_share;
+ }
+
+ return 0;
+}
+
+static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_esw_rate_group *group;
+ u32 bw_share;
+ int err;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == group->bw_share)
+ continue;
+
+ err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ group->bw_share = bw_share;
+
+ /* All the group's vports need to be set with default bw_share
+ * to enable them with QOS
+ */
+ err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 min_rate, struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share, previous_min_rate;
+ bool min_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ if (min_rate && !min_rate_supported)
+ return -EOPNOTSUPP;
+ if (min_rate == evport->qos.min_rate)
+ return 0;
+
+ previous_min_rate = evport->qos.min_rate;
+ evport->qos.min_rate = min_rate;
+ err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
+ if (err)
+ evport->qos.min_rate = previous_min_rate;
+
+ return err;
+}
+
+static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 max_rate, struct netlink_ext_ack *extack)
+{
+ u32 act_max_rate = max_rate;
+ bool max_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+ if (max_rate && !max_rate_supported)
+ return -EOPNOTSUPP;
+ if (max_rate == evport->qos.max_rate)
+ return 0;
+
+ /* If parent group has rate limit need to set to group
+ * value when new max rate is 0.
+ */
+ if (evport->qos.group && !max_rate)
+ act_max_rate = evport->qos.group->max_rate;
+
+ err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
+
+ if (!err)
+ evport->qos.max_rate = max_rate;
+
+ return err;
+}
+
+static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 min_rate, struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 previous_min_rate, divider;
+ int err;
+
+ if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
+ return -EOPNOTSUPP;
+
+ if (min_rate == group->min_rate)
+ return 0;
+
+ previous_min_rate = group->min_rate;
+ group->min_rate = min_rate;
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ group->min_rate = previous_min_rate;
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
+
+ /* Attempt restoring previous configuration */
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ u32 max_rate, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ if (group->max_rate == max_rate)
+ return 0;
+
+ err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
+ if (err)
+ return err;
+
+ group->max_rate = max_rate;
+
+ /* Any unlimited vports in the group should be set
+ * with the value of the group.
+ */
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport->enabled || !vport->qos.enabled ||
+ vport->qos.group != group || vport->qos.max_rate)
+ continue;
+
+ err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch vport implicit rate limit setting failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *curr_group,
+ struct mlx5_esw_rate_group *new_group,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_rate;
+ int err;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+ return err;
+ }
+
+ vport->qos.group = new_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+ /* If vport is unlimited, we set the group's value.
+ * Therefore, if the group is limited it will apply to
+ * the vport as well and if not, vport will remain unlimited.
+ */
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ vport->qos.group = curr_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+ if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+ esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+ vport->vport);
+
+ return err;
+}
+
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *new_group, *curr_group;
+ int err;
+
+ if (!vport->enabled)
+ return -EINVAL;
+
+ curr_group = vport->qos.group;
+ new_group = group ?: esw->qos.group0;
+ if (curr_group == new_group)
+ return 0;
+
+ err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+ if (err)
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+ if (vport->qos.bw_share || new_group->bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+
+ return 0;
+}
+
+static struct mlx5_esw_rate_group *
+__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group;
+ u32 divider;
+ int err;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+ esw->qos.root_tsar_ix);
+ err = mlx5_create_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &group->tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
+ goto err_sched_elem;
+ }
+
+ list_add_tail(&group->list, &esw->qos.groups);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (divider) {
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
+ goto err_min_rate;
+ }
+ }
+ trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
+
+ return group;
+
+err_min_rate:
+ list_del(&group->list);
+ if (mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix))
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
+err_sched_elem:
+ kfree(group);
+ return ERR_PTR(err);
+}
+
+static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
+static void esw_qos_put(struct mlx5_eswitch *esw);
+
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ int err;
+
+ if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = esw_qos_get(esw, extack);
+ if (err)
+ return ERR_PTR(err);
+
+ group = __esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group))
+ esw_qos_put(esw);
+
+ return group;
+}
+
+static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 divider;
+ int err;
+
+ list_del(&group->list);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
+
+ trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
+
+ kfree(group);
+
+ return err;
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = __esw_qos_destroy_rate_group(esw, group, extack);
+ esw_qos_put(esw);
+
+ return err;
+}
+
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+ switch (type) {
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_TASR;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+ }
+ return false;
+}
+
+static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ __be32 *attr;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, tsar_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+ attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+ *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &esw->qos.root_tsar_ix);
+ if (err) {
+ esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
+ return err;
+ }
+
+ INIT_LIST_HEAD(&esw->qos.groups);
+ if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
+ esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(esw->qos.group0)) {
+ esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
+ PTR_ERR(esw->qos.group0));
+ err = PTR_ERR(esw->qos.group0);
+ goto err_group0;
+ }
+ }
+ refcount_set(&esw->qos.refcnt, 1);
+
+ return 0;
+
+err_group0:
+ if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix))
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
+
+ return err;
+}
+
+static void esw_qos_destroy(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ if (esw->qos.group0)
+ __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+}
+
+static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ lockdep_assert_held(&esw->state_lock);
+
+ if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
+ /* esw_qos_create() set refcount to 1 only on success.
+ * No need to decrement on failure.
+ */
+ err = esw_qos_create(esw, extack);
+ }
+
+ return err;
+}
+
+static void esw_qos_put(struct mlx5_eswitch *esw)
+{
+ lockdep_assert_held(&esw->state_lock);
+ if (refcount_dec_and_test(&esw->qos.refcnt))
+ esw_qos_destroy(esw);
+}
+
+static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (vport->qos.enabled)
+ return 0;
+
+ err = esw_qos_get(esw, extack);
+ if (err)
+ return err;
+
+ vport->qos.group = esw->qos.group0;
+
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+ if (err)
+ goto err_out;
+
+ vport->qos.enabled = true;
+ trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
+
+ return 0;
+
+err_out:
+ esw_qos_put(esw);
+
+ return err;
+}
+
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (!vport->qos.enabled)
+ return;
+ WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+ "Disabling QoS on port before detaching it from group");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+
+ memset(&vport->qos, 0, sizeof(vport->qos));
+ trace_mlx5_esw_vport_qos_destroy(vport);
+
+ esw_qos_put(esw);
+}
+
+int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 min_rate)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
+ if (err)
+ return err;
+
+ err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
+ if (!err)
+ err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
+
+ return err;
+}
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
+{
+ u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_vport *vport;
+ u32 bitmask;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->qos.enabled) {
+ /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
+ err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
+ } else {
+ MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+
+ bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ err = mlx5_modify_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+ }
+ mutex_unlock(&esw->state_lock);
+
+ return err;
+}
+
+#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
+
+/* Converts bytes per second value passed in a pointer into megabits per
+ * second, rewriting last. If converted rate exceed link speed or is not a
+ * fraction of Mbps - returns error.
+ */
+static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
+ u64 *rate, struct netlink_ext_ack *extack)
+{
+ u32 link_speed_max, reminder;
+ u64 value;
+ int err;
+
+ err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
+ return err;
+ }
+
+ value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
+ if (reminder) {
+ pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
+ name, *rate);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
+ return -EINVAL;
+ }
+
+ if (value > link_speed_max) {
+ pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
+ name, value, link_speed_max);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
+ return -EINVAL;
+ }
+
+ *rate = value;
+ return 0;
+}
+
+/* Eswitch devlink rate API */
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (err)
+ goto unlock;
+
+ err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (err)
+ goto unlock;
+
+ err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_eswitch *esw;
+ int err = 0;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rate node creation supported only in switchdev mode");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ group = esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto unlock;
+ }
+
+ *priv = group;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_destroy_rate_group(esw, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->qos.enabled && !group)
+ goto unlock;
+
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (!err)
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_vport *vport = priv;
+
+ if (!parent)
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+ vport, NULL, extack);
+
+ group = parent_priv;
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
new file mode 100644
index 000000000000..0141e9d52037
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_ESW_QOS_H__
+#define __MLX5_ESW_QOS_H__
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 max_rate, u32 min_rate);
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack);
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
new file mode 100644
index 000000000000..9e72118f2e4c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 Mellanox Technologies.
+
+#include "eswitch.h"
+
+/* This struct is used as a key to the hash table and we need it to be packed
+ * so hash result is consistent
+ */
+struct mlx5_vport_key {
+ u32 chain;
+ u16 prio;
+ u16 vport;
+ u16 vhca_id;
+ const struct esw_vport_tbl_namespace *vport_ns;
+} __packed;
+
+struct mlx5_vport_table {
+ struct hlist_node hlist;
+ struct mlx5_flow_table *fdb;
+ u32 num_rules;
+ struct mlx5_vport_key key;
+};
+
+static struct mlx5_flow_table *
+esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns,
+ const struct esw_vport_tbl_namespace *vport_ns)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *fdb;
+
+ if (vport_ns->max_num_groups)
+ ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups;
+ else
+ ft_attr.autogroup.max_num_groups = esw->params.large_group_num;
+ ft_attr.max_fte = vport_ns->max_fte;
+ ft_attr.prio = FDB_PER_VPORT;
+ ft_attr.flags = vport_ns->flags;
+ fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n",
+ PTR_ERR(fdb));
+ }
+
+ return fdb;
+}
+
+static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
+ struct mlx5_vport_tbl_attr *attr,
+ struct mlx5_vport_key *key)
+{
+ key->vport = attr->vport;
+ key->chain = attr->chain;
+ key->prio = attr->prio;
+ key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ key->vport_ns = attr->vport_ns;
+ return jhash(key, sizeof(*key), 0);
+}
+
+/* caller must hold vports.lock */
+static struct mlx5_vport_table *
+esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key)
+{
+ struct mlx5_vport_table *e;
+
+ hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key)
+ if (!memcmp(&e->key, skey, sizeof(*skey)))
+ return e;
+
+ return NULL;
+}
+
+struct mlx5_flow_table *
+mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_vport_table *e;
+ struct mlx5_vport_key skey;
+ u32 hkey;
+
+ mutex_lock(&esw->fdb_table.offloads.vports.lock);
+ hkey = flow_attr_to_vport_key(esw, attr, &skey);
+ e = esw_vport_tbl_lookup(esw, &skey, hkey);
+ if (e) {
+ e->num_rules++;
+ goto out;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ fdb = ERR_PTR(-ENOMEM);
+ goto err_alloc;
+ }
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!ns) {
+ esw_warn(dev, "Failed to get FDB namespace\n");
+ fdb = ERR_PTR(-ENOENT);
+ goto err_ns;
+ }
+
+ fdb = esw_vport_tbl_create(esw, ns, attr->vport_ns);
+ if (IS_ERR(fdb))
+ goto err_ns;
+
+ e->fdb = fdb;
+ e->num_rules = 1;
+ e->key = skey;
+ hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+ return e->fdb;
+
+err_ns:
+ kfree(e);
+err_alloc:
+ mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+ return fdb;
+}
+
+void
+mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+{
+ struct mlx5_vport_table *e;
+ struct mlx5_vport_key key;
+ u32 hkey;
+
+ mutex_lock(&esw->fdb_table.offloads.vports.lock);
+ hkey = flow_attr_to_vport_key(esw, attr, &key);
+ e = esw_vport_tbl_lookup(esw, &key, hkey);
+ if (!e || --e->num_rules)
+ goto out;
+
+ hash_del(&e->hlist);
+ mlx5_destroy_flow_table(e->fdb);
+ kfree(e);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index e49acd0c5da5..2169486c4bfb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -35,11 +35,18 @@
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
+#include <linux/debugfs.h>
+#include "esw/acl/lgcy.h"
+#include "esw/legacy.h"
+#include "esw/qos.h"
#include "mlx5_core.h"
#include "lib/eq.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "devlink.h"
#include "ecpf.h"
+#include "en/mod_hdr.h"
enum {
MLX5_ACTION_NONE = 0,
@@ -58,33 +65,49 @@ struct vport_addr {
bool mc_promisc;
};
-static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw);
-static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw);
+static int mlx5_eswitch_check(const struct mlx5_core_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
+ err = mlx5_eswitch_check(dev);
+ if (err)
+ return ERR_PTR(err);
+
+ return dev->priv.eswitch;
+}
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
- u16 idx;
+ struct mlx5_vport *vport;
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
return ERR_PTR(-EPERM);
- idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
- if (idx > esw->total_vports - 1) {
- esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n",
- vport_num, idx);
+ vport = xa_load(&esw->vports, vport_num);
+ if (!vport) {
+ esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num);
return ERR_PTR(-EINVAL);
}
-
- return &esw->vports[idx];
+ return vport;
}
static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
{
- int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
- int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {};
void *nic_vport_ctx;
MLX5_SET(modify_nic_vport_context_in, in,
@@ -107,40 +130,24 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
MLX5_SET(nic_vport_context, nic_vport_ctx,
event_on_promisc_change, 1);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_nic_vport_context, in);
}
/* E-Switch vport context HW commands */
int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
- bool other_vport,
- void *in, int inlen)
+ bool other_vport, void *in)
{
- u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
-
MLX5_SET(modify_esw_vport_context_in, in, opcode,
MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
- bool other_vport,
- void *out, int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
-
- MLX5_SET(query_esw_vport_context_in, in, opcode,
- MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
- MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
- MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ return mlx5_cmd_exec_in(dev, modify_esw_vport_context, in);
}
static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
u16 vlan, u8 qos, u8 set_flags)
{
- u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
!MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
@@ -169,8 +176,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
MLX5_SET(modify_esw_vport_context_in, in,
field_select.vport_cvlan_insert, 1);
- return mlx5_eswitch_modify_esw_vport_context(dev, vport, true,
- in, sizeof(in));
+ return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, in);
}
/* E-Switch FDB */
@@ -270,224 +276,6 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
}
-enum {
- LEGACY_VEPA_PRIO = 0,
- LEGACY_FDB_PRIO,
-};
-
-static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *fdb;
- int err;
-
- root_ns = mlx5_get_fdb_sub_ns(dev, 0);
- if (!root_ns) {
- esw_warn(dev, "Failed to get FDB flow namespace\n");
- return -EOPNOTSUPP;
- }
-
- /* num FTE 2, num FG 2 */
- ft_attr.prio = LEGACY_VEPA_PRIO;
- ft_attr.max_fte = 2;
- ft_attr.autogroup.max_num_groups = 2;
- fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
- return err;
- }
- esw->fdb_table.legacy.vepa_fdb = fdb;
-
- return 0;
-}
-
-static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *fdb;
- struct mlx5_flow_group *g;
- void *match_criteria;
- int table_size;
- u32 *flow_group_in;
- u8 *dmac;
- int err = 0;
-
- esw_debug(dev, "Create FDB log_max_size(%d)\n",
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-
- root_ns = mlx5_get_fdb_sub_ns(dev, 0);
- if (!root_ns) {
- esw_warn(dev, "Failed to get FDB flow namespace\n");
- return -EOPNOTSUPP;
- }
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
- ft_attr.max_fte = table_size;
- ft_attr.prio = LEGACY_FDB_PRIO;
- fdb = mlx5_create_flow_table(root_ns, &ft_attr);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create FDB Table err %d\n", err);
- goto out;
- }
- esw->fdb_table.legacy.fdb = fdb;
-
- /* Addresses group : Full match unicast/multicast addresses */
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_OUTER_HEADERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- /* Preserve 2 entries for allmulti and promisc rules*/
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
- eth_broadcast_addr(dmac);
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create flow group err(%d)\n", err);
- goto out;
- }
- esw->fdb_table.legacy.addr_grp = g;
-
- /* Allmulti group : One rule that forwards any mcast traffic */
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
- eth_zero_addr(dmac);
- dmac[0] = 0x01;
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
- goto out;
- }
- esw->fdb_table.legacy.allmulti_grp = g;
-
- /* Promiscuous group :
- * One rule that forward all unmatched traffic from previous groups
- */
- eth_zero_addr(dmac);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
- goto out;
- }
- esw->fdb_table.legacy.promisc_grp = g;
-
-out:
- if (err)
- esw_destroy_legacy_fdb_table(esw);
-
- kvfree(flow_group_in);
- return err;
-}
-
-static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
-{
- esw_debug(esw->dev, "Destroy VEPA Table\n");
- if (!esw->fdb_table.legacy.vepa_fdb)
- return;
-
- mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
- esw->fdb_table.legacy.vepa_fdb = NULL;
-}
-
-static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
-{
- esw_debug(esw->dev, "Destroy FDB Table\n");
- if (!esw->fdb_table.legacy.fdb)
- return;
-
- if (esw->fdb_table.legacy.promisc_grp)
- mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
- if (esw->fdb_table.legacy.allmulti_grp)
- mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
- if (esw->fdb_table.legacy.addr_grp)
- mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
- mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
-
- esw->fdb_table.legacy.fdb = NULL;
- esw->fdb_table.legacy.addr_grp = NULL;
- esw->fdb_table.legacy.allmulti_grp = NULL;
- esw->fdb_table.legacy.promisc_grp = NULL;
-}
-
-static int esw_create_legacy_table(struct mlx5_eswitch *esw)
-{
- int err;
-
- memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
-
- err = esw_create_legacy_vepa_table(esw);
- if (err)
- return err;
-
- err = esw_create_legacy_fdb_table(esw);
- if (err)
- esw_destroy_legacy_vepa_table(esw);
-
- return err;
-}
-
-static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
-{
- esw_cleanup_vepa_rules(esw);
- esw_destroy_legacy_fdb_table(esw);
- esw_destroy_legacy_vepa_table(esw);
-}
-
-#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
- MLX5_VPORT_MC_ADDR_CHANGE | \
- MLX5_VPORT_PROMISC_CHANGE)
-
-static int esw_legacy_enable(struct mlx5_eswitch *esw)
-{
- struct mlx5_vport *vport;
- int ret, i;
-
- ret = esw_create_legacy_table(esw);
- if (ret)
- return ret;
-
- mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
- vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
-
- ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
- if (ret)
- esw_destroy_legacy_table(esw);
- return ret;
-}
-
-static void esw_legacy_disable(struct mlx5_eswitch *esw)
-{
- struct esw_mc_addr *mc_promisc;
-
- mlx5_eswitch_disable_pf_vf_vports(esw);
-
- mc_promisc = &esw->mc_promisc;
- if (mc_promisc->uplink_rule)
- mlx5_del_flow_rules(mc_promisc->uplink_rule);
-
- esw_destroy_legacy_table(esw);
-}
-
/* E-Switch vport UC/MC lists management */
typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
struct vport_addr *vaddr);
@@ -557,9 +345,10 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
{
u8 *mac = vaddr->node.addr;
struct mlx5_vport *vport;
- u16 i, vport_num;
+ unsigned long i;
+ u16 vport_num;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
struct hlist_head *vport_hash = vport->mc_list;
struct vport_addr *iter_vaddr =
l2addr_hash_find(vport_hash,
@@ -909,7 +698,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw,
(promisc_all || promisc_mc));
}
-static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
+void esw_vport_change_handle_locked(struct mlx5_vport *vport)
{
struct mlx5_core_dev *dev = vport->dev;
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -953,730 +742,48 @@ static void esw_vport_change_handler(struct work_struct *work)
mutex_unlock(&esw->state_lock);
}
-int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac)
{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_group *vlan_grp = NULL;
- struct mlx5_flow_group *drop_grp = NULL;
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *acl;
- void *match_criteria;
- u32 *flow_group_in;
- /* The egress acl table contains 2 rules:
- * 1)Allow traffic with vlan_tag=vst_vlan_id
- * 2)Drop all other traffic.
- */
- int table_size = 2;
- int err = 0;
-
- if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- if (!IS_ERR_OR_NULL(vport->egress.acl))
- return 0;
-
- esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
- vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
-
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- mlx5_eswitch_vport_num_to_index(esw, vport->vport));
- if (!root_ns) {
- esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
- return -EOPNOTSUPP;
- }
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
- if (IS_ERR(acl)) {
- err = PTR_ERR(acl);
- esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
- vport->vport, err);
- goto out;
- }
-
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-
- vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
- if (IS_ERR(vlan_grp)) {
- err = PTR_ERR(vlan_grp);
- esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
- vport->vport, err);
- goto out;
- }
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
- drop_grp = mlx5_create_flow_group(acl, flow_group_in);
- if (IS_ERR(drop_grp)) {
- err = PTR_ERR(drop_grp);
- esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
- vport->vport, err);
- goto out;
- }
-
- vport->egress.acl = acl;
- vport->egress.drop_grp = drop_grp;
- vport->egress.allowed_vlans_grp = vlan_grp;
-out:
- kvfree(flow_group_in);
- if (err && !IS_ERR_OR_NULL(vlan_grp))
- mlx5_destroy_flow_group(vlan_grp);
- if (err && !IS_ERR_OR_NULL(acl))
- mlx5_destroy_flow_table(acl);
- return err;
+ ((u8 *)node_guid)[7] = mac[0];
+ ((u8 *)node_guid)[6] = mac[1];
+ ((u8 *)node_guid)[5] = mac[2];
+ ((u8 *)node_guid)[4] = 0xff;
+ ((u8 *)node_guid)[3] = 0xfe;
+ ((u8 *)node_guid)[2] = mac[3];
+ ((u8 *)node_guid)[1] = mac[4];
+ ((u8 *)node_guid)[0] = mac[5];
}
-void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+static int esw_vport_setup_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
- mlx5_del_flow_rules(vport->egress.allowed_vlan);
- vport->egress.allowed_vlan = NULL;
- }
-
- if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
- mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
- vport->egress.legacy.drop_rule = NULL;
- }
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
+ return esw_legacy_vport_acl_setup(esw, vport);
+ else
+ return esw_vport_create_offloads_acl_tables(esw, vport);
}
-void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
+static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- if (IS_ERR_OR_NULL(vport->egress.acl))
- return;
-
- esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
-
- esw_vport_cleanup_egress_rules(esw, vport);
- mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp);
- mlx5_destroy_flow_group(vport->egress.drop_grp);
- mlx5_destroy_flow_table(vport->egress.acl);
- vport->egress.allowed_vlans_grp = NULL;
- vport->egress.drop_grp = NULL;
- vport->egress.acl = NULL;
-}
-
-static int
-esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_group *g;
- void *match_criteria;
- u32 *flow_group_in;
- int err;
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
- vport->vport, err);
- goto spoof_err;
- }
- vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
- vport->vport, err);
- goto untagged_err;
- }
- vport->ingress.legacy.allow_untagged_only_grp = g;
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
- vport->vport, err);
- goto allow_spoof_err;
- }
- vport->ingress.legacy.allow_spoofchk_only_grp = g;
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
- vport->vport, err);
- goto drop_err;
- }
- vport->ingress.legacy.drop_grp = g;
- kvfree(flow_group_in);
- return 0;
-
-drop_err:
- if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
- vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
- }
-allow_spoof_err:
- if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
- vport->ingress.legacy.allow_untagged_only_grp = NULL;
- }
-untagged_err:
- if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
- vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
- }
-spoof_err:
- kvfree(flow_group_in);
- return err;
-}
-
-int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport, int table_size)
-{
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *acl;
- int vport_index;
- int err;
-
- if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
- vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
-
- vport_index = mlx5_eswitch_vport_num_to_index(esw, vport->vport);
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport_index);
- if (!root_ns) {
- esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n",
- vport->vport);
- return -EOPNOTSUPP;
- }
-
- acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
- if (IS_ERR(acl)) {
- err = PTR_ERR(acl);
- esw_warn(dev, "vport[%d] ingress create flow Table, err(%d)\n",
- vport->vport, err);
- return err;
- }
- vport->ingress.acl = acl;
- return 0;
-}
-
-void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport)
-{
- if (!vport->ingress.acl)
- return;
-
- mlx5_destroy_flow_table(vport->ingress.acl);
- vport->ingress.acl = NULL;
-}
-
-void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (vport->ingress.legacy.drop_rule) {
- mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
- vport->ingress.legacy.drop_rule = NULL;
- }
-
- if (vport->ingress.allow_rule) {
- mlx5_del_flow_rules(vport->ingress.allow_rule);
- vport->ingress.allow_rule = NULL;
- }
-}
-
-static void esw_vport_disable_legacy_ingress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (!vport->ingress.acl)
- return;
-
- esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
-
- esw_vport_cleanup_ingress_rules(esw, vport);
- if (vport->ingress.legacy.allow_spoofchk_only_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
- vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
- }
- if (vport->ingress.legacy.allow_untagged_only_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
- vport->ingress.legacy.allow_untagged_only_grp = NULL;
- }
- if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
- vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
- }
- if (vport->ingress.legacy.drop_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
- vport->ingress.legacy.drop_grp = NULL;
- }
- esw_vport_destroy_ingress_acl_table(vport);
-}
-
-static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- struct mlx5_fc *counter = vport->ingress.legacy.drop_counter;
- struct mlx5_flow_destination drop_ctr_dst = {0};
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec = NULL;
- int dest_num = 0;
- int err = 0;
- u8 *smac_v;
-
- /* The ingress acl table contains 4 groups
- * (2 active rules at the same time -
- * 1 allow rule from one of the first 3 groups.
- * 1 drop rule from the last group):
- * 1)Allow untagged traffic with smac=original mac.
- * 2)Allow untagged traffic.
- * 3)Allow traffic with smac=original mac.
- * 4)Drop all other traffic.
- */
- int table_size = 4;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- return 0;
- }
-
- if (!vport->ingress.acl) {
- err = esw_vport_create_ingress_acl_table(esw, vport, table_size);
- if (err) {
- esw_warn(esw->dev,
- "vport[%d] enable ingress acl err (%d)\n",
- err, vport->vport);
- return err;
- }
-
- err = esw_vport_create_legacy_ingress_acl_groups(esw, vport);
- if (err)
- goto out;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
- vport->vport, vport->info.vlan, vport->info.qos);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec) {
- err = -ENOMEM;
- goto out;
- }
-
- if (vport->info.vlan || vport->info.qos)
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
-
- if (vport->info.spoofchk) {
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
- smac_v = MLX5_ADDR_OF(fte_match_param,
- spec->match_value,
- outer_headers.smac_47_16);
- ether_addr_copy(smac_v, vport->info.mac);
- }
-
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- vport->ingress.allow_rule =
- mlx5_add_flow_rules(vport->ingress.acl, spec,
- &flow_act, NULL, 0);
- if (IS_ERR(vport->ingress.allow_rule)) {
- err = PTR_ERR(vport->ingress.allow_rule);
- esw_warn(esw->dev,
- "vport[%d] configure ingress allow rule, err(%d)\n",
- vport->vport, err);
- vport->ingress.allow_rule = NULL;
- goto out;
- }
-
- memset(spec, 0, sizeof(*spec));
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /* Attach drop flow counter */
- if (counter) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter_id = mlx5_fc_id(counter);
- dst = &drop_ctr_dst;
- dest_num++;
- }
- vport->ingress.legacy.drop_rule =
- mlx5_add_flow_rules(vport->ingress.acl, spec,
- &flow_act, dst, dest_num);
- if (IS_ERR(vport->ingress.legacy.drop_rule)) {
- err = PTR_ERR(vport->ingress.legacy.drop_rule);
- esw_warn(esw->dev,
- "vport[%d] configure ingress drop rule, err(%d)\n",
- vport->vport, err);
- vport->ingress.legacy.drop_rule = NULL;
- goto out;
- }
- kvfree(spec);
- return 0;
-
-out:
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- kvfree(spec);
- return err;
-}
-
-int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u16 vlan_id, u32 flow_action)
-{
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_spec *spec;
- int err = 0;
-
- if (vport->egress.allowed_vlan)
- return -EEXIST;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return -ENOMEM;
-
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
-
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = flow_action;
- vport->egress.allowed_vlan =
- mlx5_add_flow_rules(vport->egress.acl, spec,
- &flow_act, NULL, 0);
- if (IS_ERR(vport->egress.allowed_vlan)) {
- err = PTR_ERR(vport->egress.allowed_vlan);
- esw_warn(esw->dev,
- "vport[%d] configure egress vlan rule failed, err(%d)\n",
- vport->vport, err);
- vport->egress.allowed_vlan = NULL;
- }
-
- kvfree(spec);
- return err;
-}
-
-static int esw_vport_egress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- struct mlx5_fc *counter = vport->egress.legacy.drop_counter;
- struct mlx5_flow_destination drop_ctr_dst = {0};
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec;
- int dest_num = 0;
- int err = 0;
-
- esw_vport_cleanup_egress_rules(esw, vport);
-
- if (!vport->info.vlan && !vport->info.qos) {
- esw_vport_disable_egress_acl(esw, vport);
- return 0;
- }
-
- err = esw_vport_enable_egress_acl(esw, vport);
- if (err) {
- mlx5_core_warn(esw->dev,
- "failed to enable egress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
- vport->vport, vport->info.vlan, vport->info.qos);
-
- /* Allowed vlan rule */
- err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, vport->info.vlan,
- MLX5_FLOW_CONTEXT_ACTION_ALLOW);
- if (err)
- return err;
-
- /* Drop others rule (star rule) */
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- goto out;
-
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /* Attach egress drop flow counter */
- if (counter) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter_id = mlx5_fc_id(counter);
- dst = &drop_ctr_dst;
- dest_num++;
- }
- vport->egress.legacy.drop_rule =
- mlx5_add_flow_rules(vport->egress.acl, spec,
- &flow_act, dst, dest_num);
- if (IS_ERR(vport->egress.legacy.drop_rule)) {
- err = PTR_ERR(vport->egress.legacy.drop_rule);
- esw_warn(esw->dev,
- "vport[%d] configure egress drop rule failed, err(%d)\n",
- vport->vport, err);
- vport->egress.legacy.drop_rule = NULL;
- }
-out:
- kvfree(spec);
- return err;
-}
-
-static bool element_type_supported(struct mlx5_eswitch *esw, int type)
-{
- const struct mlx5_core_dev *dev = esw->dev;
-
- switch (type) {
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_TASR;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT_TC;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
- }
- return false;
-}
-
-/* Vport QoS management */
-static void esw_create_tsar(struct mlx5_eswitch *esw)
-{
- u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- __be32 *attr;
- int err;
-
- if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return;
-
- if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
- return;
-
- if (esw->qos.enabled)
- return;
-
- MLX5_SET(scheduling_context, tsar_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
-
- attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
- *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
-
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- tsar_ctx,
- &esw->qos.root_tsar_id);
- if (err) {
- esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
- return;
- }
-
- esw->qos.enabled = true;
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
+ esw_legacy_vport_acl_cleanup(esw, vport);
+ else
+ esw_vport_destroy_offloads_acl_tables(esw, vport);
}
-static void esw_destroy_tsar(struct mlx5_eswitch *esw)
+static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
+ u16 vport_num = vport->vport;
+ int flags;
int err;
- if (!esw->qos.enabled)
- return;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- esw->qos.root_tsar_id);
+ err = esw_vport_setup_acl(esw, vport);
if (err)
- esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
-
- esw->qos.enabled = false;
-}
-
-static int esw_vport_enable_qos(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u32 initial_max_rate, u32 initial_bw_share)
-{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
- int err = 0;
-
- if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) ||
- !MLX5_CAP_QOS(dev, esw_scheduling))
- return 0;
-
- if (vport->qos.enabled)
- return -EEXIST;
-
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- esw->qos.root_tsar_id);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
- initial_max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
-
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- &vport->qos.esw_tsar_ix);
- if (err) {
- esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
return err;
- }
-
- vport->qos.enabled = true;
- return 0;
-}
-
-static void esw_vport_disable_qos(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int err;
-
- if (!vport->qos.enabled)
- return;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- vport->qos.esw_tsar_ix);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
-
- vport->qos.enabled = false;
-}
-
-static int esw_vport_qos_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u32 max_rate, u32 bw_share)
-{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
- u32 bitmask = 0;
- int err = 0;
-
- if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return -EOPNOTSUPP;
-
- if (!vport->qos.enabled)
- return -EIO;
-
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- esw->qos.root_tsar_id);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
- max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
- bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
- bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
-
- err = mlx5_modify_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- vport->qos.esw_tsar_ix,
- bitmask);
- if (err) {
- esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- return err;
- }
-
- return 0;
-}
-
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
- u32 rate_mbps)
-{
- u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- struct mlx5_vport *vport;
-
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
-
- return mlx5_modify_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- ctx,
- vport->qos.esw_tsar_ix,
- MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW);
-}
-
-static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
-{
- ((u8 *)node_guid)[7] = mac[0];
- ((u8 *)node_guid)[6] = mac[1];
- ((u8 *)node_guid)[5] = mac[2];
- ((u8 *)node_guid)[4] = 0xff;
- ((u8 *)node_guid)[3] = 0xfe;
- ((u8 *)node_guid)[2] = mac[3];
- ((u8 *)node_guid)[1] = mac[4];
- ((u8 *)node_guid)[0] = mac[5];
-}
-
-static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- u16 vport_num = vport->vport;
- int flags;
if (mlx5_esw_is_manager_vport(esw, vport_num))
- return;
+ return 0;
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
@@ -1693,119 +800,46 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
flags = (vport->info.vlan || vport->info.qos) ?
SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
- modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
- flags);
-}
-
-static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int ret;
-
- /* Only non manager vports need ACL in legacy mode */
- if (mlx5_esw_is_manager_vport(esw, vport->vport))
- return 0;
-
- if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
- MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
- vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false);
- if (IS_ERR(vport->ingress.legacy.drop_counter)) {
- esw_warn(esw->dev,
- "vport[%d] configure ingress drop rule counter failed\n",
- vport->vport);
- vport->ingress.legacy.drop_counter = NULL;
- }
- }
-
- ret = esw_vport_ingress_config(esw, vport);
- if (ret)
- goto ingress_err;
-
- if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
- MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
- vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false);
- if (IS_ERR(vport->egress.legacy.drop_counter)) {
- esw_warn(esw->dev,
- "vport[%d] configure egress drop rule counter failed\n",
- vport->vport);
- vport->egress.legacy.drop_counter = NULL;
- }
- }
-
- ret = esw_vport_egress_config(esw, vport);
- if (ret)
- goto egress_err;
+ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+ vport->info.qos, flags);
return 0;
-
-egress_err:
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
- vport->egress.legacy.drop_counter = NULL;
-
-ingress_err:
- mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
- vport->ingress.legacy.drop_counter = NULL;
- return ret;
-}
-
-static int esw_vport_setup_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (esw->mode == MLX5_ESWITCH_LEGACY)
- return esw_vport_create_legacy_acl_tables(esw, vport);
- else
- return esw_vport_create_offloads_acl_tables(esw, vport);
}
-static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-
+/* Don't cleanup vport->info, it's needed to restore vport configuration */
+static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
- if (mlx5_esw_is_manager_vport(esw, vport->vport))
- return;
-
- esw_vport_disable_egress_acl(esw, vport);
- mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
- vport->egress.legacy.drop_counter = NULL;
+ u16 vport_num = vport->vport;
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
- vport->ingress.legacy.drop_counter = NULL;
-}
+ if (!mlx5_esw_is_manager_vport(esw, vport_num))
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num, 1,
+ MLX5_VPORT_ADMIN_STATE_DOWN);
-static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (esw->mode == MLX5_ESWITCH_LEGACY)
- esw_vport_destroy_legacy_acl_tables(esw, vport);
- else
- esw_vport_destroy_offloads_acl_tables(esw, vport);
+ mlx5_esw_qos_vport_disable(esw, vport);
+ esw_vport_cleanup_acl(esw, vport);
}
-static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
- enum mlx5_eswitch_vport_event enabled_events)
+int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events)
{
- u16 vport_num = vport->vport;
+ struct mlx5_vport *vport;
int ret;
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
mutex_lock(&esw->state_lock);
WARN_ON(vport->enabled);
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
- /* Restore old vport configuration */
- esw_apply_vport_conf(esw, vport);
-
- ret = esw_vport_setup_acl(esw, vport);
+ ret = esw_vport_setup(esw, vport);
if (ret)
goto done;
- /* Attach vport to the eswitch rate limiter */
- if (esw_vport_enable_qos(esw, vport, vport->info.max_rate,
- vport->qos.bw_share))
- esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num);
-
/* Sync with current vport context */
vport->enabled_events = enabled_events;
vport->enabled = true;
@@ -1817,6 +851,19 @@ static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
(!vport_num && mlx5_core_is_ecpf(esw->dev)))
vport->info.trusted = true;
+ if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
+ ret = mlx5_esw_vport_vhca_id_set(esw, vport_num);
+ if (ret)
+ goto err_vhca_mapping;
+ }
+
+ /* External controller host PF has factory programmed MAC.
+ * Read it from the device.
+ */
+ if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
+ mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
+
esw_vport_change_handle_locked(vport);
esw->enabled_vports++;
@@ -1824,12 +871,20 @@ static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
done:
mutex_unlock(&esw->state_lock);
return ret;
+
+err_vhca_mapping:
+ esw_vport_cleanup(esw, vport);
+ mutex_unlock(&esw->state_lock);
+ return ret;
}
-static void esw_disable_vport(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
{
- u16 vport_num = vport->vport;
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
mutex_lock(&esw->state_lock);
if (!vport->enabled)
@@ -1841,22 +896,18 @@ static void esw_disable_vport(struct mlx5_eswitch *esw,
/* Disable events from this vport */
arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+
+ if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ mlx5_esw_vport_vhca_id_clear(esw, vport_num);
+
/* We don't assume VFs will cleanup after themselves.
* Calling vport change handler while vport is disabled will cleanup
* the vport resources.
*/
esw_vport_change_handle_locked(vport);
vport->enabled_events = 0;
- esw_vport_disable_qos(esw, vport);
-
- if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
- esw->mode == MLX5_ESWITCH_LEGACY)
- mlx5_modify_vport_admin_state(esw->dev,
- MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- vport_num, 1,
- MLX5_VPORT_ADMIN_STATE_DOWN);
-
- esw_vport_cleanup_acl(esw, vport);
+ esw_vport_cleanup(esw, vport);
esw->enabled_vports--;
done:
@@ -1933,16 +984,96 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ memset(&vport->qos, 0, sizeof(vport->qos));
memset(&vport->info, 0, sizeof(vport->info));
vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
}
}
/* Public E-Switch API */
-#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
+int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ int err;
+
+ err = mlx5_esw_vport_enable(esw, vport_num, enabled_events);
+ if (err)
+ return err;
+
+ mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
+ err = esw_offloads_load_rep(esw, vport_num);
+ if (err)
+ goto err_rep;
+
+ return err;
+
+err_rep:
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport_num);
+ return err;
+}
+
+void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ esw_offloads_unload_rep(esw, vport_num);
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport_num);
+}
+
+void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ if (!vport->enabled)
+ continue;
+ mlx5_eswitch_unload_vport(esw, vport->vport);
+ }
+}
+
+int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
+ if (err)
+ goto vf_err;
+ }
+
+ return 0;
+
+vf_err:
+ mlx5_eswitch_unload_vf_vports(esw, num_vfs);
+ return err;
+}
+
+static int host_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_ecpf(dev))
+ return 0;
+
+ /* Once vport and representor are ready, take out the external host PF
+ * out of initializing state. Enabling HCA clears the iser->initializing
+ * bit and host PF driver loading can progress.
+ */
+ return mlx5_cmd_host_pf_enable_hca(dev);
+}
+
+static void host_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_ecpf(dev))
+ return;
+
+ mlx5_cmd_host_pf_disable_hca(dev);
+}
/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs
* whichever are present on the eswitch.
@@ -1951,46 +1082,39 @@ int
mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
enum mlx5_eswitch_vport_event enabled_events)
{
- struct mlx5_vport *vport;
- int num_vfs;
int ret;
- int i;
/* Enable PF vport */
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
- ret = esw_enable_vport(esw, vport, enabled_events);
+ ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events);
if (ret)
return ret;
+ /* Enable external host PF HCA */
+ ret = host_pf_enable_hca(esw->dev);
+ if (ret)
+ goto pf_hca_err;
+
/* Enable ECPF vport */
if (mlx5_ecpf_vport_exists(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
- ret = esw_enable_vport(esw, vport, enabled_events);
+ ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events);
if (ret)
goto ecpf_err;
}
/* Enable VF vports */
- mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
- ret = esw_enable_vport(esw, vport, enabled_events);
- if (ret)
- goto vf_err;
- }
+ ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs,
+ enabled_events);
+ if (ret)
+ goto vf_err;
return 0;
vf_err:
- num_vfs = i - 1;
- mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, num_vfs)
- esw_disable_vport(esw, vport);
-
- if (mlx5_ecpf_vport_exists(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
- esw_disable_vport(esw, vport);
- }
-
+ if (mlx5_ecpf_vport_exists(esw->dev))
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
ecpf_err:
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
- esw_disable_vport(esw, vport);
+ host_pf_disable_hca(esw->dev);
+pf_hca_err:
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
return ret;
}
@@ -1999,116 +1123,451 @@ ecpf_err:
*/
void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
{
- struct mlx5_vport *vport;
- int i;
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+
+ if (mlx5_ecpf_vport_exists(esw->dev))
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
- mlx5_esw_for_all_vports_reverse(esw, i, vport)
- esw_disable_vport(esw, vport);
+ host_pf_disable_hca(esw->dev);
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
}
-int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode)
+static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+ union devlink_param_value val;
int err;
- if (!ESW_ALLOWED(esw) ||
- !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
- esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
- return -EOPNOTSUPP;
+ err = devlink_param_driverinit_value_get(devlink,
+ MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ &val);
+ if (!err) {
+ esw->params.large_group_num = val.vu32;
+ } else {
+ esw_warn(esw->dev,
+ "Devlink can't get param fdb_large_groups, uses default (%d).\n",
+ ESW_OFFLOADS_DEFAULT_NUM_GROUPS);
+ esw->params.large_group_num = ESW_OFFLOADS_DEFAULT_NUM_GROUPS;
+ }
+}
+
+static void
+mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs)
+{
+ const u32 *out;
+
+ if (num_vfs < 0)
+ return;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ esw->esw_funcs.num_vfs = num_vfs;
+ return;
}
- if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ return;
- if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "engress ACL is not supported by FW\n");
+ esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ kvfree(out);
+}
+
+static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode)
+{
+ struct mlx5_esw_event_info info = {};
- esw_create_tsar(esw);
+ info.new_mode = mode;
- esw->mode = mode;
+ blocking_notifier_call_chain(&esw->n_head, 0, &info);
+}
- mlx5_lag_update(esw->dev);
+static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ int total_vports;
+ int err;
- if (mode == MLX5_ESWITCH_LEGACY) {
+ if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED)
+ return 0;
+
+ total_vports = mlx5_eswitch_get_total_vports(dev);
+
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+ err = mlx5_fs_egress_acls_init(dev, total_vports);
+ if (err)
+ return err;
+ } else {
+ esw_warn(dev, "engress ACL is not supported by FW\n");
+ }
+
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+ err = mlx5_fs_ingress_acls_init(dev, total_vports);
+ if (err)
+ goto err;
+ } else {
+ esw_warn(dev, "ingress ACL is not supported by FW\n");
+ }
+ esw->flags |= MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
+ return 0;
+
+err:
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+ mlx5_fs_egress_acls_cleanup(dev);
+ return err;
+}
+
+static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+
+ esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
+ mlx5_fs_ingress_acls_cleanup(dev);
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+ mlx5_fs_egress_acls_cleanup(dev);
+}
+
+/**
+ * mlx5_eswitch_enable_locked - Enable eswitch
+ * @esw: Pointer to eswitch
+ * @num_vfs: Enable eswitch for given number of VFs. This is optional.
+ * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS.
+ * Caller should pass num_vfs > 0 when enabling eswitch for
+ * vf vports. Caller should pass num_vfs = 0, when eswitch
+ * is enabled without sriov VFs or when caller
+ * is unaware of the sriov state of the host PF on ECPF based
+ * eswitch. Caller should pass < 0 when num_vfs should be
+ * completely ignored. This is typically the case when eswitch
+ * is enabled without sriov regardless of PF/ECPF system.
+ * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads
+ * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports.
+ * It returns 0 on success or error code on failure.
+ */
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
+{
+ int err;
+
+ lockdep_assert_held(&esw->mode_lock);
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+ esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
+ return -EOPNOTSUPP;
+ }
+
+ mlx5_eswitch_get_devlink_param(esw);
+
+ err = mlx5_esw_acls_ns_init(esw);
+ if (err)
+ return err;
+
+ mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
+
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_rescan_drivers(esw->dev);
err = esw_offloads_enable(esw);
}
if (err)
goto abort;
+ esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
+
mlx5_eswitch_event_handlers_register(esw);
esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
- mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
esw->esw_funcs.num_vfs, esw->enabled_vports);
+ mlx5_esw_mode_change_notify(esw, esw->mode);
+
return 0;
abort:
- esw->mode = MLX5_ESWITCH_NONE;
+ mlx5_esw_acls_ns_cleanup(esw);
+ return err;
+}
+
+/**
+ * mlx5_eswitch_enable - Enable eswitch
+ * @esw: Pointer to eswitch
+ * @num_vfs: Enable eswitch switch for given number of VFs.
+ * Caller must pass num_vfs > 0 when enabling eswitch for
+ * vf vports.
+ * mlx5_eswitch_enable() returns 0 on success or error code on failure.
+ */
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+{
+ bool toggle_lag;
+ int ret;
+
+ if (!mlx5_esw_allowed(esw))
+ return 0;
- if (mode == MLX5_ESWITCH_OFFLOADS) {
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ devl_assert_locked(priv_to_devlink(esw->dev));
+
+ toggle_lag = !mlx5_esw_is_fdb_created(esw);
+
+ if (toggle_lag)
+ mlx5_lag_disable_change(esw->dev);
+
+ down_write(&esw->mode_lock);
+ if (!mlx5_esw_is_fdb_created(esw)) {
+ ret = mlx5_eswitch_enable_locked(esw, num_vfs);
+ } else {
+ enum mlx5_eswitch_vport_event vport_events;
+
+ vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ?
+ MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE;
+ ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events);
+ if (!ret)
+ esw->esw_funcs.num_vfs = num_vfs;
}
+ up_write(&esw->mode_lock);
- return err;
+ if (toggle_lag)
+ mlx5_lag_enable_change(esw->dev);
+
+ return ret;
}
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
+/* When disabling sriov, free driver level resources. */
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
{
- int old_mode;
-
- if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
+ if (!mlx5_esw_allowed(esw))
return;
- esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ devl_assert_locked(priv_to_devlink(esw->dev));
+ down_write(&esw->mode_lock);
+ /* If driver is unloaded, this function is called twice by remove_one()
+ * and mlx5_unload(). Prevent the second call.
+ */
+ if (!esw->esw_funcs.num_vfs && !clear_vf)
+ goto unlock;
+
+ esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n",
esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
esw->esw_funcs.num_vfs, esw->enabled_vports);
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+ if (clear_vf)
+ mlx5_eswitch_clear_vf_vports_info(esw);
+ /* If disabling sriov in switchdev mode, free meta rules here
+ * because it depends on num_vfs.
+ */
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS) {
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
+ devl_rate_nodes_destroy(devlink);
+ }
+
+ esw->esw_funcs.num_vfs = 0;
+
+unlock:
+ up_write(&esw->mode_lock);
+}
+
+/* Free resources for corresponding eswitch mode. It is called by devlink
+ * when changing eswitch mode or modprobe when unloading driver.
+ */
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
+ /* Notify eswitch users that it is exiting from current mode.
+ * So that it can do necessary cleanup before the eswitch is disabled.
+ */
+ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY);
+
mlx5_eswitch_event_handlers_unregister(esw);
- if (esw->mode == MLX5_ESWITCH_LEGACY)
- esw_legacy_disable(esw);
- else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
- esw_offloads_disable(esw);
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
- esw_destroy_tsar(esw);
+ if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) {
+ esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ esw_offloads_disable(esw);
+ else if (esw->mode == MLX5_ESWITCH_LEGACY)
+ esw_legacy_disable(esw);
+ mlx5_esw_acls_ns_cleanup(esw);
+ }
- old_mode = esw->mode;
- esw->mode = MLX5_ESWITCH_NONE;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ devl_rate_nodes_destroy(devlink);
+}
- mlx5_lag_update(esw->dev);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
- if (old_mode == MLX5_ESWITCH_OFFLOADS) {
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ devl_assert_locked(priv_to_devlink(esw->dev));
+ mlx5_lag_disable_change(esw->dev);
+ down_write(&esw->mode_lock);
+ mlx5_eswitch_disable_locked(esw);
+ up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
+}
+
+static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
+{
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
+
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id)
+
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!mlx5_core_is_ecpf(dev)) {
+ *max_sfs = 0;
+ return 0;
}
- if (clear_vf)
- mlx5_eswitch_clear_vf_vports_info(esw);
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_cap_host_pf(dev, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf);
+ *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
+ int index, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+ int err;
+
+ vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+ if (!vport)
+ return -ENOMEM;
+
+ vport->dev = esw->dev;
+ vport->vport = vport_num;
+ vport->index = index;
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler);
+ err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+
+ esw->total_vports++;
+ return 0;
+
+insert_err:
+ kfree(vport);
+ return err;
+}
+
+static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ xa_erase(&esw->vports, vport->vport);
+ kfree(vport);
+}
+
+static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vport(esw, i, vport)
+ mlx5_esw_vport_free(esw, vport);
+ xa_destroy(&esw->vports);
+}
+
+static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ u16 max_host_pf_sfs;
+ u16 base_sf_num;
+ int idx = 0;
+ int err;
+ int i;
+
+ xa_init(&esw->vports);
+
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF);
+ if (err)
+ goto err;
+ if (esw->first_host_vport == MLX5_VPORT_PF)
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+
+ for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, idx);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+ }
+ base_sf_num = mlx5_sf_start_function_id(dev);
+ for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num);
+ if (err)
+ goto err;
+ for (i = 0; i < max_host_pf_sfs; i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ if (mlx5_ecpf_vport_exists(dev)) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
+ if (err)
+ goto err;
+ idx++;
+ }
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK);
+ if (err)
+ goto err;
+ return 0;
+
+err:
+ mlx5_esw_vports_cleanup(esw);
+ return err;
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- int total_vports;
- int err, i;
+ int err;
if (!MLX5_VPORT_MANAGER(dev))
return 0;
- total_vports = mlx5_eswitch_get_total_vports(dev);
-
- esw_info(dev,
- "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
- total_vports,
- MLX5_MAX_UC_PER_VPORT(dev),
- MLX5_MAX_MC_PER_VPORT(dev));
-
esw = kzalloc(sizeof(*esw), GFP_KERNEL);
if (!esw)
return -ENOMEM;
@@ -2123,45 +1582,54 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
goto abort;
}
- esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
- GFP_KERNEL);
- if (!esw->vports) {
- err = -ENOMEM;
+ err = mlx5_esw_vports_init(esw);
+ if (err)
goto abort;
- }
-
- esw->total_vports = total_vports;
err = esw_offloads_init_reps(esw);
if (err)
- goto abort;
+ goto reps_err;
mutex_init(&esw->offloads.encap_tbl_lock);
hash_init(esw->offloads.encap_tbl);
- mutex_init(&esw->offloads.mod_hdr.lock);
- hash_init(esw->offloads.mod_hdr.hlist);
+ mutex_init(&esw->offloads.decap_tbl_lock);
+ hash_init(esw->offloads.decap_tbl);
+ mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr);
atomic64_set(&esw->offloads.num_flows, 0);
+ ida_init(&esw->offloads.vport_metadata_ida);
+ xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
-
- mlx5_esw_for_all_vports(esw, i, vport) {
- vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
- vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
- vport->dev = dev;
- INIT_WORK(&vport->vport_change_handler,
- esw_vport_change_handler);
- }
+ init_rwsem(&esw->mode_lock);
+ refcount_set(&esw->qos.refcnt, 0);
esw->enabled_vports = 0;
- esw->mode = MLX5_ESWITCH_NONE;
+ esw->mode = MLX5_ESWITCH_LEGACY;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+ else
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
dev->priv.eswitch = esw;
+ BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+
+ esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
+ esw_info(dev,
+ "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+ esw->total_vports,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
return 0;
+
+reps_err:
+ mlx5_esw_vports_cleanup(esw);
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
- esw_offloads_cleanup_reps(esw);
- kfree(esw->vports);
kfree(esw);
return err;
}
@@ -2173,80 +1641,126 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
+ debugfs_remove_recursive(esw->dbgfs);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
- esw_offloads_cleanup_reps(esw);
- mutex_destroy(&esw->offloads.mod_hdr.lock);
+ WARN_ON(refcount_read(&esw->qos.refcnt));
+ mutex_destroy(&esw->state_lock);
+ WARN_ON(!xa_empty(&esw->offloads.vhca_map));
+ xa_destroy(&esw->offloads.vhca_map);
+ ida_destroy(&esw->offloads.vport_metadata_ida);
+ mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr);
mutex_destroy(&esw->offloads.encap_tbl_lock);
- kfree(esw->vports);
+ mutex_destroy(&esw->offloads.decap_tbl_lock);
+ esw_offloads_cleanup_reps(esw);
+ mlx5_esw_vports_cleanup(esw);
kfree(esw);
}
/* Vport Administration */
-int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
- u16 vport, u8 mac[ETH_ALEN])
+static int
+mlx5_esw_set_vport_mac_locked(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport, const u8 *mac)
{
- struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ u16 vport_num = evport->vport;
u64 node_guid;
int err = 0;
- if (IS_ERR(evport))
- return PTR_ERR(evport);
if (is_multicast_ether_addr(mac))
return -EINVAL;
- mutex_lock(&esw->state_lock);
-
if (evport->info.spoofchk && !is_valid_ether_addr(mac))
mlx5_core_warn(esw->dev,
"Set invalid MAC while spoofchk is on, vport(%d)\n",
- vport);
+ vport_num);
- err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+ err = mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, mac);
if (err) {
mlx5_core_warn(esw->dev,
"Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
- vport, err);
- goto unlock;
+ vport_num, err);
+ return err;
}
node_guid_gen_from_mac(&node_guid, mac);
- err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid);
+ err = mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, node_guid);
if (err)
mlx5_core_warn(esw->dev,
"Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
- vport, err);
+ vport_num, err);
ether_addr_copy(evport->info.mac, mac);
evport->info.node_guid = node_guid;
if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
- err = esw_vport_ingress_config(esw, evport);
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
-unlock:
+ return err;
+}
+
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ u16 vport, const u8 *mac)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err = 0;
+
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_set_vport_mac_locked(esw, evport, mac);
mutex_unlock(&esw->state_lock);
return err;
}
+static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return false;
+
+ return xa_get_mark(&esw->vports, vport_num, mark);
+}
+
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF);
+}
+
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
+}
+
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
u16 vport, int link_state)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int opmod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
+ int other_vport = 1;
int err = 0;
- if (!ESW_ALLOWED(esw))
+ if (!mlx5_esw_allowed(esw))
return -EPERM;
if (IS_ERR(evport))
return PTR_ERR(evport);
+ if (vport == MLX5_VPORT_UPLINK) {
+ opmod = MLX5_VPORT_STATE_OP_MOD_UPLINK;
+ other_vport = 0;
+ vport = 0;
+ }
mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
- err = mlx5_modify_vport_admin_state(esw->dev,
- MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- vport, 1, link_state);
+ err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state);
if (err) {
- mlx5_core_warn(esw->dev,
- "Failed to set vport %d link state, err = %d",
- vport, err);
+ mlx5_core_warn(esw->dev, "Failed to set vport %d link state, opmod = %d, err = %d",
+ vport, opmod, err);
goto unlock;
}
@@ -2275,8 +1789,10 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
ivi->qos = evport->info.qos;
ivi->spoofchk = evport->info.spoofchk;
ivi->trusted = evport->info.trusted;
- ivi->min_tx_rate = evport->info.min_rate;
- ivi->max_tx_rate = evport->info.max_rate;
+ if (evport->qos.enabled) {
+ ivi->min_tx_rate = evport->qos.min_rate;
+ ivi->max_tx_rate = evport->qos.max_rate;
+ }
mutex_unlock(&esw->state_lock);
return 0;
@@ -2288,8 +1804,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
int err = 0;
- if (!ESW_ALLOWED(esw))
- return -EPERM;
if (IS_ERR(evport))
return PTR_ERR(evport);
if (vlan > 4095 || qos > 7)
@@ -2302,341 +1816,23 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
evport->info.vlan = vlan;
evport->info.qos = qos;
if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
- err = esw_vport_ingress_config(esw, evport);
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
if (err)
return err;
- err = esw_vport_egress_config(esw, evport);
- }
-
- return err;
-}
-
-int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
- u16 vport, u16 vlan, u8 qos)
-{
- u8 set_flags = 0;
- int err;
-
- if (vlan || qos)
- set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
-
- mutex_lock(&esw->state_lock);
- err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
- mutex_unlock(&esw->state_lock);
-
- return err;
-}
-
-int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
- u16 vport, bool spoofchk)
-{
- struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
- bool pschk;
- int err = 0;
-
- if (!ESW_ALLOWED(esw))
- return -EPERM;
- if (IS_ERR(evport))
- return PTR_ERR(evport);
-
- mutex_lock(&esw->state_lock);
- pschk = evport->info.spoofchk;
- evport->info.spoofchk = spoofchk;
- if (pschk && !is_valid_ether_addr(evport->info.mac))
- mlx5_core_warn(esw->dev,
- "Spoofchk in set while MAC is invalid, vport(%d)\n",
- evport->vport);
- if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
- err = esw_vport_ingress_config(esw, evport);
- if (err)
- evport->info.spoofchk = pschk;
- mutex_unlock(&esw->state_lock);
-
- return err;
-}
-
-static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
-{
- if (esw->fdb_table.legacy.vepa_uplink_rule)
- mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
-
- if (esw->fdb_table.legacy.vepa_star_rule)
- mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
-
- esw->fdb_table.legacy.vepa_uplink_rule = NULL;
- esw->fdb_table.legacy.vepa_star_rule = NULL;
-}
-
-static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
- u8 setting)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_handle *flow_rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- void *misc;
-
- if (!setting) {
- esw_cleanup_vepa_rules(esw);
- return 0;
- }
-
- if (esw->fdb_table.legacy.vepa_uplink_rule)
- return 0;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return -ENOMEM;
-
- /* Uplink rule forward uplink traffic to FDB */
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = esw->fdb_table.legacy.fdb;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
- &flow_act, &dest, 1);
- if (IS_ERR(flow_rule)) {
- err = PTR_ERR(flow_rule);
- goto out;
- } else {
- esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
- }
-
- /* Star rule to forward all traffic to uplink vport */
- memset(spec, 0, sizeof(*spec));
- memset(&dest, 0, sizeof(dest));
- dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest.vport.num = MLX5_VPORT_UPLINK;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
- &flow_act, &dest, 1);
- if (IS_ERR(flow_rule)) {
- err = PTR_ERR(flow_rule);
- goto out;
- } else {
- esw->fdb_table.legacy.vepa_star_rule = flow_rule;
- }
-
-out:
- kvfree(spec);
- if (err)
- esw_cleanup_vepa_rules(esw);
- return err;
-}
-
-int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
-{
- int err = 0;
-
- if (!esw)
- return -EOPNOTSUPP;
-
- if (!ESW_ALLOWED(esw))
- return -EPERM;
-
- mutex_lock(&esw->state_lock);
- if (esw->mode != MLX5_ESWITCH_LEGACY) {
- err = -EOPNOTSUPP;
- goto out;
- }
-
- err = _mlx5_eswitch_set_vepa_locked(esw, setting);
-
-out:
- mutex_unlock(&esw->state_lock);
- return err;
-}
-
-int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
-{
- if (!esw)
- return -EOPNOTSUPP;
-
- if (!ESW_ALLOWED(esw))
- return -EPERM;
-
- if (esw->mode != MLX5_ESWITCH_LEGACY)
- return -EOPNOTSUPP;
-
- *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
- return 0;
-}
-
-int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
- u16 vport, bool setting)
-{
- struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
-
- if (!ESW_ALLOWED(esw))
- return -EPERM;
- if (IS_ERR(evport))
- return PTR_ERR(evport);
-
- mutex_lock(&esw->state_lock);
- evport->info.trusted = setting;
- if (evport->enabled)
- esw_vport_change_handle_locked(evport);
- mutex_unlock(&esw->state_lock);
-
- return 0;
-}
-
-static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
-{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- struct mlx5_vport *evport;
- u32 max_guarantee = 0;
- int i;
-
- mlx5_esw_for_all_vports(esw, i, evport) {
- if (!evport->enabled || evport->info.min_rate < max_guarantee)
- continue;
- max_guarantee = evport->info.min_rate;
- }
-
- return max_t(u32, max_guarantee / fw_max_bw_share, 1);
-}
-
-static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
-{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- struct mlx5_vport *evport;
- u32 vport_max_rate;
- u32 vport_min_rate;
- u32 bw_share;
- int err;
- int i;
-
- mlx5_esw_for_all_vports(esw, i, evport) {
- if (!evport->enabled)
- continue;
- vport_min_rate = evport->info.min_rate;
- vport_max_rate = evport->info.max_rate;
- bw_share = MLX5_MIN_BW_SHARE;
-
- if (vport_min_rate)
- bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
- divider,
- fw_max_bw_share);
-
- if (bw_share == evport->qos.bw_share)
- continue;
-
- err = esw_vport_qos_config(esw, evport, vport_max_rate,
- bw_share);
- if (!err)
- evport->qos.bw_share = bw_share;
- else
- return err;
- }
-
- return 0;
-}
-
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
- u32 max_rate, u32 min_rate)
-{
- struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
- u32 fw_max_bw_share;
- u32 previous_min_rate;
- u32 divider;
- bool min_rate_supported;
- bool max_rate_supported;
- int err = 0;
-
- if (!ESW_ALLOWED(esw))
- return -EPERM;
- if (IS_ERR(evport))
- return PTR_ERR(evport);
-
- fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
- fw_max_bw_share >= MLX5_MIN_BW_SHARE;
- max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
-
- if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
- return -EOPNOTSUPP;
-
- mutex_lock(&esw->state_lock);
-
- if (min_rate == evport->info.min_rate)
- goto set_max_rate;
-
- previous_min_rate = evport->info.min_rate;
- evport->info.min_rate = min_rate;
- divider = calculate_vports_min_rate_divider(esw);
- err = normalize_vports_min_rate(esw, divider);
- if (err) {
- evport->info.min_rate = previous_min_rate;
- goto unlock;
+ err = esw_acl_egress_lgcy_setup(esw, evport);
}
-set_max_rate:
- if (max_rate == evport->info.max_rate)
- goto unlock;
-
- err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share);
- if (!err)
- evport->info.max_rate = max_rate;
-
-unlock:
- mutex_unlock(&esw->state_lock);
return err;
}
-static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
- struct mlx5_vport *vport,
- struct mlx5_vport_drop_stats *stats)
-{
- struct mlx5_eswitch *esw = dev->priv.eswitch;
- u64 rx_discard_vport_down, tx_discard_vport_down;
- u64 bytes = 0;
- int err = 0;
-
- if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY)
- return 0;
-
- if (vport->egress.legacy.drop_counter)
- mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
- &stats->rx_dropped, &bytes);
-
- if (vport->ingress.legacy.drop_counter)
- mlx5_fc_query(dev, vport->ingress.legacy.drop_counter,
- &stats->tx_dropped, &bytes);
-
- if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
- !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
- return 0;
-
- err = mlx5_query_vport_down_stats(dev, vport->vport, 1,
- &rx_discard_vport_down,
- &tx_discard_vport_down);
- if (err)
- return err;
-
- if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
- stats->rx_dropped += rx_discard_vport_down;
- if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
- stats->tx_dropped += tx_discard_vport_down;
-
- return 0;
-}
-
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
u16 vport_num,
struct ifla_vf_stats *vf_stats)
{
struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
- u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
- struct mlx5_vport_drop_stats stats = {0};
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {};
+ struct mlx5_vport_drop_stats stats = {};
int err = 0;
u32 *out;
@@ -2653,7 +1849,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport);
MLX5_SET(query_vport_counter_in, in, other_vport, 1);
- err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+ err = mlx5_cmd_exec_inout(esw->dev, query_vport_counter, in, out);
if (err)
goto free_out;
@@ -2696,7 +1892,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
vf_stats->broadcast =
MLX5_GET_CTR(out, received_eth_broadcast.packets);
- err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+ err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats);
if (err)
goto free_out;
vf_stats->rx_dropped = stats.rx_dropped;
@@ -2707,9 +1903,11 @@ free_out:
return err;
}
-u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
{
- return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+ return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
@@ -2719,45 +1917,143 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
struct mlx5_eswitch *esw;
esw = dev->priv.eswitch;
- return ESW_ALLOWED(esw) ? esw->offloads.encap :
+ return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap :
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
}
EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
-bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+}
+
+int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *nb)
{
- if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
- (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
+ return blocking_notifier_chain_register(&esw->n_head, nb);
+}
+
+void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&esw->n_head, nb);
+}
+
+/**
+ * mlx5_esw_hold() - Try to take a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ *
+ * Should be called by esw resources callers.
+ *
+ * Return: true on success or false.
+ */
+bool mlx5_esw_hold(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ /* e.g. VF doesn't have eswitch so nothing to do */
+ if (!mlx5_esw_allowed(esw))
+ return true;
+
+ if (down_read_trylock(&esw->mode_lock) != 0)
return true;
return false;
}
-bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1)
+/**
+ * mlx5_esw_release() - Release a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_release(struct mlx5_core_dev *mdev)
{
- return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ if (mlx5_esw_allowed(esw))
+ up_read(&esw->mode_lock);
}
-void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs)
+/**
+ * mlx5_esw_get() - Increase esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_get(struct mlx5_core_dev *mdev)
{
- const u32 *out;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
- WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
+ if (mlx5_esw_allowed(esw))
+ atomic64_inc(&esw->user_count);
+}
- if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- esw->esw_funcs.num_vfs = num_vfs;
- return;
+/**
+ * mlx5_esw_put() - Decrease esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_put(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ if (mlx5_esw_allowed(esw))
+ atomic64_dec_if_positive(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_try_lock() - Take a write lock on esw mode lock.
+ * @esw: eswitch device.
+ *
+ * Should be called by esw mode change routine.
+ *
+ * Return:
+ * * 0 - esw mode if successfully locked and refcount is 0.
+ * * -EBUSY - refcount is not 0.
+ * * -EINVAL - In the middle of switching mode or lock is already held.
+ */
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
+{
+ if (down_write_trylock(&esw->mode_lock) == 0)
+ return -EINVAL;
+
+ if (atomic64_read(&esw->user_count) > 0) {
+ up_write(&esw->mode_lock);
+ return -EBUSY;
}
- out = mlx5_esw_query_functions(esw->dev);
- if (IS_ERR(out))
- return;
+ return esw->mode;
+}
- esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
- host_params_context.host_num_of_vfs);
- kvfree(out);
+/**
+ * mlx5_esw_unlock() - Release write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_unlock(struct mlx5_eswitch *esw)
+{
+ up_write(&esw->mode_lock);
+}
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of eswitch vports.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return mlx5_esw_allowed(esw) ? esw->dev : NULL;
}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 4472710ccc9c..f68dc2d0dbe6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -36,39 +36,63 @@
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
+#include <linux/xarray.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "lib/mpfs.h"
+#include "lib/fs_chains.h"
+#include "sf/sf.h"
+#include "en/tc_ct.h"
+#include "en/tc/sample.h"
+
+enum mlx5_mapped_obj_type {
+ MLX5_MAPPED_OBJ_CHAIN,
+ MLX5_MAPPED_OBJ_SAMPLE,
+ MLX5_MAPPED_OBJ_INT_PORT_METADATA,
+};
-#define FDB_TC_MAX_CHAIN 3
-#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1)
-#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1)
-
-/* The index of the last real chain (FT) + 1 as chain zero is valid as well */
-#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1)
-
-#define FDB_TC_MAX_PRIO 16
-#define FDB_TC_LEVELS_PER_PRIO 2
+struct mlx5_mapped_obj {
+ enum mlx5_mapped_obj_type type;
+ union {
+ u32 chain;
+ struct {
+ u32 group_id;
+ u32 rate;
+ u32 trunc_size;
+ u32 tunnel_id;
+ } sample;
+ u32 int_port_metadata;
+ };
+};
#ifdef CONFIG_MLX5_ESWITCH
+#define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15
+
#define MLX5_MAX_UC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
#define MLX5_MAX_MC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
-#define MLX5_MIN_BW_SHARE 1
-
-#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
- min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
-
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+#define esw_chains(esw) \
+ ((esw)->fdb_table.offloads.esw_chains_priv)
+
+enum {
+ MAPPING_TYPE_CHAIN,
+ MAPPING_TYPE_TUNNEL,
+ MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ MAPPING_TYPE_LABELS,
+ MAPPING_TYPE_ZONE,
+ MAPPING_TYPE_INT_PORT,
+};
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_handle *allow_rule;
@@ -89,20 +113,31 @@ struct vport_ingress {
* packet with metadata.
*/
struct mlx5_flow_group *metadata_allmatch_grp;
+ /* Optional group to add a drop all rule */
+ struct mlx5_flow_group *drop_grp;
struct mlx5_modify_hdr *modify_metadata;
struct mlx5_flow_handle *modify_metadata_rule;
+ struct mlx5_flow_handle *drop_rule;
} offloads;
};
struct vport_egress {
struct mlx5_flow_table *acl;
- struct mlx5_flow_group *allowed_vlans_grp;
- struct mlx5_flow_group *drop_grp;
struct mlx5_flow_handle *allowed_vlan;
- struct {
- struct mlx5_flow_handle *drop_rule;
- struct mlx5_fc *drop_counter;
- } legacy;
+ struct mlx5_flow_group *vlan_grp;
+ union {
+ struct {
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+ } legacy;
+ struct {
+ struct mlx5_flow_group *fwd_grp;
+ struct mlx5_flow_handle *fwd_rule;
+ struct mlx5_flow_handle *bounce_rule;
+ struct mlx5_flow_group *bounce_grp;
+ } offloads;
+ };
};
struct mlx5_vport_drop_stats {
@@ -113,13 +148,11 @@ struct mlx5_vport_drop_stats {
struct mlx5_vport_info {
u8 mac[ETH_ALEN];
u16 vlan;
- u8 qos;
u64 node_guid;
int link_state;
- u32 min_rate;
- u32 max_rate;
- bool spoofchk;
- bool trusted;
+ u8 qos;
+ u8 spoofchk: 1;
+ u8 trusted: 1;
};
/* Vport context events */
@@ -131,7 +164,6 @@ enum mlx5_eswitch_vport_event {
struct mlx5_vport {
struct mlx5_core_dev *dev;
- int vport;
struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
struct mlx5_flow_handle *promisc_rule;
@@ -140,6 +172,8 @@ struct mlx5_vport {
struct vport_ingress ingress;
struct vport_egress egress;
+ u32 default_metadata;
+ u32 metadata;
struct mlx5_vport_info info;
@@ -147,17 +181,20 @@ struct mlx5_vport {
bool enabled;
u32 esw_tsar_ix;
u32 bw_share;
+ u32 min_rate;
+ u32 max_rate;
+ struct mlx5_esw_rate_group *group;
} qos;
+ u16 vport;
bool enabled;
enum mlx5_eswitch_vport_event enabled_events;
+ int index;
+ struct devlink_port *dl_port;
+ struct dentry *dbgfs;
};
-enum offloads_fdb_flags {
- ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
-};
-
-struct mlx5_esw_chains_priv;
+struct mlx5_esw_indir_table;
struct mlx5_eswitch_fdb {
union {
@@ -173,36 +210,59 @@ struct mlx5_eswitch_fdb {
struct offloads_fdb {
struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *tc_miss_table;
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *send_to_vport_meta_grp;
struct mlx5_flow_group *peer_miss_grp;
struct mlx5_flow_handle **peer_miss_rules;
struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle **send_to_vport_meta_rules;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
- struct mlx5_esw_chains_priv *esw_chains_priv;
+ struct mlx5_fs_chains *esw_chains_priv;
+ struct {
+ DECLARE_HASHTABLE(table, 8);
+ /* Protects vports.table */
+ struct mutex lock;
+ } vports;
+
+ struct mlx5_esw_indir_table *indir;
+
} offloads;
};
u32 flags;
};
struct mlx5_esw_offload {
+ struct mlx5_flow_table *ft_offloads_restore;
+ struct mlx5_flow_group *restore_group;
+ struct mlx5_modify_hdr *restore_copy_hdr_id;
+ struct mapping_ctx *reg_c0_obj_pool;
+
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
- struct mlx5_eswitch_rep *vport_reps;
+ struct mlx5_flow_group *vport_rx_drop_group;
+ struct mlx5_flow_handle *vport_rx_drop_rule;
+ struct xarray vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
+ struct mutex decap_tbl_lock; /* protects decap_tbl */
+ DECLARE_HASHTABLE(decap_tbl, 8);
struct mod_hdr_tbl mod_hdr;
DECLARE_HASHTABLE(termtbl_tbl, 8);
struct mutex termtbl_mutex; /* protects termtbl hash */
+ struct xarray vhca_map;
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
atomic64_t num_flows;
enum devlink_eswitch_encap_mode encap;
+ struct ida vport_metadata_ida;
+ unsigned int host_number; /* ECPF supports one external host */
};
/* E-Switch MC FDB table hash node */
@@ -224,6 +284,14 @@ struct mlx5_esw_functions {
enum {
MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+ MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1),
+ MLX5_ESWITCH_VPORT_ACL_NS_CREATED = BIT(2),
+};
+
+struct mlx5_esw_bridge_offloads;
+
+enum {
+ MLX5_ESW_FDB_CREATED = BIT(0),
};
struct mlx5_eswitch {
@@ -235,7 +303,7 @@ struct mlx5_eswitch {
struct esw_mc_addr mc_promisc;
/* end of legacy */
struct workqueue_struct *work_queue;
- struct mlx5_vport *vports;
+ struct xarray vports;
u32 flags;
int total_vports;
int enabled_vports;
@@ -244,45 +312,64 @@ struct mlx5_eswitch {
*/
struct mutex state_lock;
+ /* Protects eswitch mode change that occurs via one or more
+ * user commands, i.e. sriov state change, devlink commands.
+ */
+ struct rw_semaphore mode_lock;
+ atomic64_t user_count;
+
struct {
- bool enabled;
- u32 root_tsar_id;
+ u32 root_tsar_ix;
+ struct mlx5_esw_rate_group *group0;
+ struct list_head groups; /* Protected by esw->state_lock */
+
+ /* Protected by esw->state_lock.
+ * Initially 0, meaning no QoS users and QoS is disabled.
+ */
+ refcount_t refcnt;
} qos;
+ struct mlx5_esw_bridge_offloads *br_offloads;
struct mlx5_esw_offload offloads;
int mode;
- int nvports;
u16 manager_vport;
u16 first_host_vport;
struct mlx5_esw_functions esw_funcs;
+ struct {
+ u32 large_group_num;
+ } params;
+ struct blocking_notifier_head n_head;
+ struct dentry *dbgfs;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
int esw_offloads_enable(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
-void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- int table_size);
-void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport);
-void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
- u32 rate_mbps);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule);
+
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf);
+
+#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1)
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
- u16 vport, u8 mac[ETH_ALEN]);
+ u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
u16 vport, int link_state);
int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
@@ -293,6 +380,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
u16 vport_num, bool setting);
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate);
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack);
int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
@@ -303,11 +394,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
- bool other_vport,
- void *in, int inlen);
-int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
- bool other_vport,
- void *out, int outlen);
+ bool other_vport, void *in);
struct mlx5_flow_spec;
struct mlx5_esw_flow_attr;
@@ -315,6 +402,7 @@ struct mlx5_termtbl_handle;
bool
mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_spec *spec);
@@ -331,22 +419,25 @@ void
mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
struct mlx5_termtbl_handle *tt);
+void
+mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec);
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
struct mlx5_flow_handle *
mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
void
mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
@@ -365,27 +456,24 @@ enum mlx5_flow_match_level {
};
/* current maximum for flow based vport multicasting */
-#define MLX5_MAX_FLOW_FWD_VPORTS 2
+#define MLX5_MAX_FLOW_FWD_VPORTS 32
enum {
MLX5_ESW_DEST_ENCAP = BIT(0),
MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
-};
-
-enum {
- MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0),
- MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1),
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2),
};
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
struct mlx5_core_dev *in_mdev;
struct mlx5_core_dev *counter_dev;
+ struct mlx5e_tc_int_port *dest_int_port;
+ struct mlx5e_tc_int_port *int_port;
int split_count;
int out_count;
- int action;
__be16 vlan_proto[MLX5_FS_VLAN_DEPTH];
u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
@@ -396,16 +484,11 @@ struct mlx5_esw_flow_attr {
struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_core_dev *mdev;
struct mlx5_termtbl_handle *termtbl;
+ int src_port_rewrite_act_id;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5_modify_hdr *modify_hdr;
- u8 inner_match_level;
- u8 outer_match_level;
- struct mlx5_fc *counter;
- u32 chain;
- u16 prio;
- u32 dest_chain;
- u32 flags;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_rx_tun_attr *rx_tun_attr;
+ struct ethhdr eth;
+ struct mlx5_pkt_reformat *decap_pkt_reformat;
};
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
@@ -414,25 +497,27 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
enum devlink_eswitch_encap_mode *encap);
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
+ u8 *hw_addr, int *hw_addr_len,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack);
+
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags);
-int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u16 vlan_id, u32 flow_action);
-
static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
u8 vlan_depth)
{
@@ -446,8 +531,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
}
-bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1);
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
@@ -464,6 +547,11 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
#define esw_debug(dev, format, ...) \
mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw)
+{
+ return esw && MLX5_ESWITCH_MANAGER(esw->dev);
+}
+
/* The returned number is valid only when the dev is eswitch manager. */
static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
{
@@ -483,132 +571,69 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
}
-static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
+static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev)
{
- /* Ideally device should have the functions changed supported
- * capability regardless of it being ECPF or PF wherever such
- * event should be processed such as on eswitch manager device.
- * However, some ECPF based device might not have this capability
- * set. Hence OR for ECPF check to cover such device.
- */
- return MLX5_CAP_ESW(dev, esw_functions_changed) ||
- mlx5_core_is_ecpf_esw_manager(dev);
-}
-
-static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
-{
- /* Uplink always locate at the last element of the array.*/
- return esw->total_vports - 1;
+ return mlx5_core_is_ecpf_esw_manager(dev);
}
-static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
+static inline unsigned int
+mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
+ u16 vport_num)
{
- return esw->total_vports - 2;
+ return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num;
}
-static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
- u16 vport_num)
+static inline u16
+mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index)
{
- if (vport_num == MLX5_VPORT_ECPF) {
- if (!mlx5_ecpf_vport_exists(esw->dev))
- esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
- return mlx5_eswitch_ecpf_idx(esw);
- }
-
- if (vport_num == MLX5_VPORT_UPLINK)
- return mlx5_eswitch_uplink_idx(esw);
-
- return vport_num;
+ return dl_port_index & 0xffff;
}
-static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
- int index)
+static inline bool mlx5_esw_is_fdb_created(struct mlx5_eswitch *esw)
{
- if (index == mlx5_eswitch_ecpf_idx(esw) &&
- mlx5_ecpf_vport_exists(esw->dev))
- return MLX5_VPORT_ECPF;
-
- if (index == mlx5_eswitch_uplink_idx(esw))
- return MLX5_VPORT_UPLINK;
-
- return index;
+ return esw->fdb_table.flags & MLX5_ESW_FDB_CREATED;
}
/* TODO: This mlx5e_tc function shouldn't be called by eswitch */
void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
-/* The vport getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+/* Each mark identifies eswitch vport type.
+ * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using
+ * a single mark.
+ * MLX5_ESW_VPT_VF identifies a SRIOV VF vport.
+ * MLX5_ESW_VPT_SF identifies SF vport.
*/
-#define mlx5_esw_for_all_vports(esw, i, vport) \
- for ((i) = MLX5_VPORT_PF; \
- (vport) = &(esw)->vports[i], \
- (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \
- for ((i) = (esw)->total_vports - 1; \
- (vport) = &(esw)->vports[i], \
- (i) >= MLX5_VPORT_PF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \
- for ((i) = MLX5_VPORT_FIRST_VF; \
- (vport) = &(esw)->vports[(i)], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs) \
- for ((i) = (nvfs); \
- (vport) = &(esw)->vports[(i)], \
- (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-/* The rep getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+#define MLX5_ESW_VPT_HOST_FN XA_MARK_0
+#define MLX5_ESW_VPT_VF XA_MARK_1
+#define MLX5_ESW_VPT_SF XA_MARK_2
+
+/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init.
+ * Borrowed the idea from xa_for_each_marked() but with support for desired last element.
*/
-#define mlx5_esw_for_all_reps(esw, i, rep) \
- for ((i) = MLX5_VPORT_PF; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \
- for ((i) = MLX5_VPORT_FIRST_VF; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \
- for ((i) = (nvfs); \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs) \
- for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
- for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
-
-/* Includes host PF (vport 0) if it's not esw manager. */
-#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
- for ((i) = (esw)->first_host_vport; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
- for ((i) = (nvfs); \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) >= (esw)->first_host_vport; (i)--)
-
-#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
- for ((vport) = (esw)->first_host_vport; \
- (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
- for ((vport) = (nvfs); \
- (vport) >= (esw)->first_host_vport; (vport)--)
+#define mlx5_esw_for_each_vport(esw, index, vport) \
+ xa_for_each(&((esw)->vports), index, vport)
+
+#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \
+ for (index = 0, entry = xa_find(xa, &index, last, filter); \
+ entry; entry = xa_find_after(xa, &index, last, filter))
+
+#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \
+ mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter)
+
+#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF)
+
+#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN)
+
+struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink);
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num);
-void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
int
@@ -616,6 +641,10 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
enum mlx5_eswitch_vport_event enabled_events);
void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
+int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
@@ -623,21 +652,134 @@ void
esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+struct esw_vport_tbl_namespace {
+ int max_fte;
+ int max_num_groups;
+ u32 flags;
+};
+
+struct mlx5_vport_tbl_attr {
+ u32 chain;
+ u16 prio;
+ u16 vport;
+ const struct esw_vport_tbl_namespace *vport_ns;
+};
+
+struct mlx5_flow_table *
+mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+void
+mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+
+struct mlx5_flow_handle *
+esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
+
+int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
+void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs);
+
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum);
+void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum);
+void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
+
+int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num);
+
+/**
+ * mlx5_esw_event_info - Indicates eswitch mode changed/changing.
+ *
+ * @new_mode: New mode of eswitch.
+ */
+struct mlx5_esw_event_info {
+ u16 new_mode;
+};
+
+int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n);
+void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n);
+
+bool mlx5_esw_hold(struct mlx5_core_dev *dev);
+void mlx5_esw_release(struct mlx5_core_dev *dev);
+void mlx5_esw_get(struct mlx5_core_dev *dev);
+void mlx5_esw_put(struct mlx5_core_dev *dev);
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
+void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+
+void esw_vport_change_handle_locked(struct mlx5_vport *vport);
+
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
-static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
-static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
+static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
{
return ERR_PTR(-EOPNOTSUPP);
}
-static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
+static inline struct mlx5_flow_handle *
+esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline unsigned int
+mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
+ u16 vport_num)
+{
+ return vport_num;
+}
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ return 0;
+}
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ return 0;
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 1a57b2bd74b8..728ca9f2bb9d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -31,18 +31,35 @@
*/
#include <linux/etherdevice.h>
+#include <linux/idr.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "eswitch.h"
-#include "eswitch_offloads_chains.h"
+#include "esw/indir_table.h"
+#include "esw/acl/ofld.h"
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
#include "lib/devcom.h"
#include "lib/eq.h"
+#include "lib/fs_chains.h"
+#include "en_tc.h"
+#include "en/mapping.h"
+#include "devlink.h"
+#include "lag/lag.h"
+
+#define mlx5_esw_for_each_rep(esw, i, rep) \
+ xa_for_each(&((esw)->offloads.vport_reps), i, rep)
+
+#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
+ xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF)
+
+#define mlx5_esw_for_each_vf_rep(esw, index, rep) \
+ mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \
+ rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF)
/* There are two match-all miss flows, one for unicast dst mac and
* one for multicast.
@@ -50,28 +67,71 @@
#define MLX5_ESW_MISS_FLOWS (2)
#define UPLINK_REP_INDEX 0
+#define MLX5_ESW_VPORT_TBL_SIZE 128
+#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4
+
+#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+
+static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
+ .max_fte = MLX5_ESW_VPORT_TBL_SIZE,
+ .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
+ .flags = 0,
+};
+
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
u16 vport_num)
{
- int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+ return xa_load(&esw->offloads.vport_reps, vport_num);
+}
- WARN_ON(idx > esw->total_vports - 1);
- return &esw->offloads.vport_reps[idx];
+static void
+mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep)
+ return;
+
+ if (attr->int_port) {
+ spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port);
+
+ return;
+ }
+
+ spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ?
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK :
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
}
-static bool
-esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
- const struct mlx5_vport *vport)
+/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits
+ * are not needed as well in the following process. So clear them all for simplicity.
+ */
+void
+mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec)
{
- return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
- mlx5_eswitch_is_vf_vport(esw, vport->vport));
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ void *misc2;
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
+
+ if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2)))
+ spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2;
+ }
}
static void
mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr,
+ struct mlx5_eswitch *src_esw,
+ u16 vport)
{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 metadata;
void *misc2;
void *misc;
@@ -79,26 +139,32 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
* VHCA in dual-port RoCE mode, and matching on source vport may fail.
*/
if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ vport = mlx5_esw_indir_table_decap_vport(attr);
+
+ if (attr && !attr->chain && esw_attr->int_port)
+ metadata =
+ mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
+ else
+ metadata =
+ mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport);
+
misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
- MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
- attr->in_rep->vport));
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata);
misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
- MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
} else {
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
MLX5_SET(fte_match_set_misc, misc,
source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+ MLX5_CAP_GEN(src_esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
@@ -108,91 +174,433 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
}
+}
+
+static int
+esw_setup_decap_indir(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_flow_table *ft;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ ft = mlx5_esw_indir_table_get(esw, attr, spec,
+ mlx5_esw_indir_table_decap_vport(attr), true);
+ return PTR_ERR_OR_ZERO(ft);
+}
+
+static void
+esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ mlx5_esw_indir_table_put(esw, attr,
+ mlx5_esw_indir_table_decap_vport(attr),
+ true);
+}
+
+static int
+esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ u32 sampler_id,
+ int i)
+{
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ dest[i].sampler_id = sampler_id;
+
+ return 0;
+}
+
+static int
+esw_setup_ft_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ int i)
+{
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = attr->dest_ft;
+
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ return esw_setup_decap_indir(esw, attr, spec);
+ return 0;
+}
+
+static void
+esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains, int i)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
+}
+
+static void
+esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, int i)
+{
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = esw->fdb_table.offloads.slow_fdb;
+}
+
+static int
+esw_setup_chain_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level,
+ int i)
+{
+ struct mlx5_flow_table *ft;
+
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ ft = mlx5_chains_get_table(chains, chain, prio, level);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = ft;
+ return 0;
+}
+
+static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
+ int from, int to)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ int i;
+
+ for (i = from; i < to; i++)
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
+ esw_attr->dests[i].mdev))
+ mlx5_esw_indir_table_put(esw, attr, esw_attr->dests[i].rep->vport,
+ false);
+}
+
+static bool
+esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr)
+{
+ int i;
+
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ return true;
+ return false;
+}
+
+static int
+esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_fs_chains *chains,
+ struct mlx5_flow_attr *attr,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ /* flow steering cannot handle more than one dest with the same ft
+ * in a single flow
+ */
+ if (esw_attr->out_count - esw_attr->split_count > 1)
+ return -EOPNOTSUPP;
+
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
+ if (err)
+ return err;
+
+ if (esw_attr->dests[esw_attr->split_count].pkt_reformat) {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat;
+ }
+ (*i)++;
+
+ return 0;
+}
+
+static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
+}
+
+static bool
+esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ bool result = false;
+ int i;
+
+ /* Indirect table is supported only for flows with in_port uplink
+ * and the destination is vport on the same eswitch as the uplink,
+ * return false in case at least one of destinations doesn't meet
+ * this criteria.
+ */
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
+ if (esw_attr->dests[i].rep &&
+ mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
+ esw_attr->dests[i].mdev)) {
+ result = true;
+ } else {
+ result = false;
+ break;
+ }
+ }
+ return result;
+}
+
+static int
+esw_setup_indir_table(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ bool ignore_flow_lvl,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int j, err;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
+ if (ignore_flow_lvl)
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, spec,
+ esw_attr->dests[j].rep->vport, false);
+ if (IS_ERR(dest[*i].ft)) {
+ err = PTR_ERR(dest[*i].ft);
+ goto err_indir_tbl_get;
+ }
+ }
+
+ if (mlx5_esw_indir_table_decap_vport(attr)) {
+ err = esw_setup_decap_indir(esw, attr, spec);
+ if (err)
+ goto err_indir_tbl_get;
+ }
+
+ return 0;
+
+err_indir_tbl_get:
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
+ return err;
+}
+
+static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
+ esw_cleanup_decap_indir(esw, attr);
+}
+
+static void
+esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level)
+{
+ mlx5_chains_put_table(chains, chain, prio, level);
+}
+
+static void
+esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
+ int attr_idx, int dest_idx, bool pkt_reformat)
+{
+ dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ dest[dest_idx].vport.vhca_id =
+ MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
+ dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
+ mlx5_lag_mpesw_is_activated(esw->dev))
+ dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+ }
+ if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
+ if (pkt_reformat) {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+ }
+ dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+ }
+}
+
+static int
+esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
+ int i)
+{
+ int j;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++)
+ esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true);
+ return i;
+}
+
+static bool
+esw_src_port_rewrite_supported(struct mlx5_eswitch *esw)
+{
+ return MLX5_CAP_GEN(esw->dev, reg_c_preserve) &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level);
+}
+
+static int
+esw_setup_dests(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ int err = 0;
+
+ if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
+ esw_src_port_rewrite_supported(esw))
+ attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE &&
+ !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) {
+ esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
+ (*i)++;
+ } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) {
+ esw_setup_slow_path_dest(dest, flow_act, esw, *i);
+ (*i)++;
+ } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) {
+ esw_setup_accept_dest(dest, flow_act, chains, *i);
+ (*i)++;
+ } else if (esw_is_indir_table(esw, attr)) {
+ err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i);
+ } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
+ err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
+ } else {
+ *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
+
+ if (attr->dest_ft) {
+ err = esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
+ (*i)++;
+ } else if (attr->dest_chain) {
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
+ 1, 0, *i);
+ (*i)++;
+ }
+ }
+
+ return err;
+}
+
+static void
+esw_cleanup_dests(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+
+ if (attr->dest_ft) {
+ esw_cleanup_decap_indir(esw, attr);
+ } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
+ if (attr->dest_chain)
+ esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
+ else if (esw_is_indir_table(esw, attr))
+ esw_cleanup_indir_table(esw, attr);
+ else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
+ esw_cleanup_chain_src_port_rewrite(esw, attr);
+ }
+}
+
+static void
+esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act)
+{
+ struct mlx5e_flow_meter_handle *meter;
- if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
- attr->in_rep->vport == MLX5_VPORT_UPLINK)
- spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ meter = attr->meter_attr.meter;
+ flow_act->exe_aso.type = attr->exe_aso_type;
+ flow_act->exe_aso.object_id = meter->obj_id;
+ flow_act->exe_aso.flow_meter.meter_idx = meter->idx;
+ flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN;
+ /* use metadata reg 5 for packet color */
+ flow_act->exe_aso.return_reg_id = 5;
}
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
- bool split = !!(attr->split_count);
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ bool split = !!(esw_attr->split_count);
+ struct mlx5_vport_tbl_attr fwd_attr;
+ struct mlx5_flow_destination *dest;
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
- int j, i = 0;
+ int i = 0;
if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
+ dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
+ if (!dest)
+ return ERR_PTR(-ENOMEM);
+
flow_act.action = attr->action;
/* if per flow vlan pop/push is emulated, don't set that into the firmware */
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
- flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
- flow_act.vlan[0].vid = attr->vlan_vid[0];
- flow_act.vlan[0].prio = attr->vlan_prio[0];
+ flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]);
+ flow_act.vlan[0].vid = esw_attr->vlan_vid[0];
+ flow_act.vlan[0].prio = esw_attr->vlan_prio[0];
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
- flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
- flow_act.vlan[1].vid = attr->vlan_vid[1];
- flow_act.vlan[1].prio = attr->vlan_prio[1];
+ flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]);
+ flow_act.vlan[1].vid = esw_attr->vlan_vid[1];
+ flow_act.vlan[1].prio = esw_attr->vlan_prio[1];
}
}
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- struct mlx5_flow_table *ft;
+ mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
- flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw);
- i++;
- } else if (attr->dest_chain) {
- flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- ft = mlx5_esw_chains_get_table(esw, attr->dest_chain,
- 1, 0);
- if (IS_ERR(ft)) {
- rule = ERR_CAST(ft);
- goto err_create_goto_table;
- }
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int err;
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = ft;
- i++;
- } else {
- for (j = attr->split_count; j < attr->out_count; j++) {
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->dests[j].rep->vport;
- dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- dest[i].vport.flags |=
- MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
- dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.pkt_reformat =
- attr->dests[j].pkt_reformat;
- }
- i++;
- }
+ err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_create_goto_table;
}
}
+
+ if (esw_attr->decap_pkt_reformat)
+ flow_act.pkt_reformat = esw_attr->decap_pkt_reformat;
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest[i].counter_id = mlx5_fc_id(attr->counter);
i++;
}
- mlx5_eswitch_set_rule_source_port(esw, spec, attr);
-
if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
if (attr->inner_match_level != MLX5_MATCH_NONE)
@@ -201,15 +609,36 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_hdr = attr->modify_hdr;
- fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio,
- !!split);
+ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)
+ esw_setup_meter(attr, &flow_act);
+
+ if (split) {
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+
+ fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
+ } else {
+ if (attr->chain || attr->prio)
+ fdb = mlx5_chains_get_table(chains, attr->chain,
+ attr->prio, 0);
+ else
+ fdb = attr->ft;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr,
+ esw_attr->in_mdev->priv.eswitch,
+ esw_attr->in_rep->vport);
+ }
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
}
- if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec))
- rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
+ if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
+ rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
&flow_act, dest, i);
else
rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
@@ -218,59 +647,78 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
else
atomic64_inc(&esw->offloads.num_flows);
+ kfree(dest);
return rule;
err_add_rule:
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split);
+ if (split)
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ else if (attr->chain || attr->prio)
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_esw_get:
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
- mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
+ esw_cleanup_dests(esw, attr);
err_create_goto_table:
+ kfree(dest);
return rule;
}
struct mlx5_flow_handle *
mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5_vport_tbl_attr fwd_attr;
+ struct mlx5_flow_destination *dest;
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
- int i;
+ int i, err = 0;
- fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0);
+ dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
+ if (!dest)
+ return ERR_PTR(-ENOMEM);
+
+ fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
if (IS_ERR(fast_fdb)) {
rule = ERR_CAST(fast_fdb);
goto err_get_fast;
}
- fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1);
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
if (IS_ERR(fwd_fdb)) {
rule = ERR_CAST(fwd_fdb);
goto err_get_fwd;
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- for (i = 0; i < attr->split_count; i++) {
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->dests[i].rep->vport;
- dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
- dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
+ for (i = 0; i < esw_attr->split_count; i++) {
+ if (esw_is_indir_table(esw, attr))
+ err = esw_setup_indir_table(dest, &flow_act, esw, attr, spec, false, &i);
+ else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
+ err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr,
+ &i);
+ else
+ esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
+
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_chain_src_rewrite;
}
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = fwd_fdb,
+ dest[i].ft = fwd_fdb;
i++;
- mlx5_eswitch_set_rule_source_port(esw, spec, attr);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr,
+ esw_attr->in_mdev->priv.eswitch,
+ esw_attr->in_rep->vport);
if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -278,54 +726,73 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
- if (IS_ERR(rule))
- goto add_err;
+ if (IS_ERR(rule)) {
+ i = esw_attr->split_count;
+ goto err_chain_src_rewrite;
+ }
atomic64_inc(&esw->offloads.num_flows);
+ kfree(dest);
return rule;
-add_err:
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1);
+err_chain_src_rewrite:
+ esw_put_dest_tables_loop(esw, attr, 0, i);
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
err_get_fwd:
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_get_fast:
+ kfree(dest);
return rule;
}
static void
__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
bool fwd_rule)
{
- bool split = (attr->split_count > 0);
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ bool split = (esw_attr->split_count > 0);
+ struct mlx5_vport_tbl_attr fwd_attr;
int i;
mlx5_del_flow_rules(rule);
- /* unref the term table */
- for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
- if (attr->dests[i].termtbl)
- mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
+ if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
+ /* unref the term table */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (esw_attr->dests[i].termtbl)
+ mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl);
+ }
}
atomic64_dec(&esw->offloads.num_flows);
+ if (fwd_rule || split) {
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ }
+
if (fwd_rule) {
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1);
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+ esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
} else {
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio,
- !!split);
- if (attr->dest_chain)
- mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
+ if (split)
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ else if (attr->chain || attr->prio)
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+ esw_cleanup_dests(esw, attr);
}
}
void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
__mlx5_eswitch_del_rule(esw, rule, attr, false);
}
@@ -333,7 +800,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
void
mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
__mlx5_eswitch_del_rule(esw, rule, attr, true);
}
@@ -341,10 +808,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
{
struct mlx5_eswitch_rep *rep;
- int i, err = 0;
+ unsigned long i;
+ int err = 0;
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
- mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+ mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) {
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
continue;
@@ -410,9 +878,10 @@ out_notsupp:
}
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_eswitch_rep *vport = NULL;
bool push, pop, fwd;
int err = 0;
@@ -428,19 +897,19 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
mutex_lock(&esw->state_lock);
- err = esw_add_vlan_action_check(attr, push, pop, fwd);
+ err = esw_add_vlan_action_check(esw_attr, push, pop, fwd);
if (err)
goto unlock;
- attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED;
- vport = esw_vlan_action_get_vport(attr, push, pop);
+ vport = esw_vlan_action_get_vport(esw_attr, push, pop);
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
+ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
vport->vlan_refcount++;
- attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
}
goto unlock;
@@ -461,26 +930,27 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (vport->vlan_refcount)
goto skip_set_push;
- err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
- SET_VLAN_INSERT | SET_VLAN_STRIP);
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0],
+ 0, SET_VLAN_INSERT | SET_VLAN_STRIP);
if (err)
goto out;
- vport->vlan = attr->vlan_vid[0];
+ vport->vlan = esw_attr->vlan_vid[0];
skip_set_push:
vport->vlan_refcount++;
}
out:
if (!err)
- attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
unlock:
mutex_unlock(&esw->state_lock);
return err;
}
int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_eswitch_rep *vport = NULL;
bool push, pop, fwd;
int err = 0;
@@ -489,7 +959,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
return 0;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED))
+ if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED))
return 0;
push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
@@ -498,11 +968,11 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
mutex_lock(&esw->state_lock);
- vport = esw_vlan_action_get_vport(attr, push, pop);
+ vport = esw_vlan_action_get_vport(esw_attr, push, pop);
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
+ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
vport->vlan_refcount--;
goto out;
@@ -534,7 +1004,9 @@ out:
}
struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
+ struct mlx5_eswitch_rep *rep,
u32 sqn)
{
struct mlx5_flow_act flow_act = {0};
@@ -552,21 +1024,33 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
/* source vport is the esw manager */
- MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
+ MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
+ if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(from_esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest.vport.num = vport;
+ dest.vport.num = rep->vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
+ flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule))
- esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
+ esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n",
+ PTR_ERR(flow_rule));
out:
kvfree(spec);
return flow_rule;
@@ -578,37 +1062,98 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule)
+{
+ if (rule)
+ mlx5_del_flow_rules(rule);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(fte_match_param, spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1,
+ ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+ dest.vport.num = vport_num;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n",
+ vport_num, PTR_ERR(flow_rule));
+
+ kvfree(spec);
+ return flow_rule;
+}
+
+static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
+{
+ return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_1;
+}
+
static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
{
u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
- u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
- u8 fdb_to_vport_reg_c_id;
+ u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+ u8 curr, wanted;
int err;
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw) &&
+ !mlx5_eswitch_vport_match_metadata_enabled(esw))
return 0;
- err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false,
- out, sizeof(out));
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out);
if (err)
return err;
- fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
- esw_vport_context.fdb_to_vport_reg_c_id);
+ curr = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+ wanted = MLX5_FDB_TO_VPORT_REG_C_0;
+ if (mlx5_eswitch_reg_c1_loopback_supported(esw))
+ wanted |= MLX5_FDB_TO_VPORT_REG_C_1;
if (enable)
- fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+ curr |= wanted;
else
- fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
-
- MLX5_SET(modify_esw_vport_context_in, in,
- esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+ curr &= ~wanted;
- MLX5_SET(modify_esw_vport_context_in, in,
+ MLX5_SET(modify_esw_vport_context_in, min,
+ esw_vport_context.fdb_to_vport_reg_c_id, curr);
+ MLX5_SET(modify_esw_vport_context_in, min,
field_select.fdb_to_vport_reg_c_id, 1);
- return mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false,
- in, sizeof(in));
+ err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min);
+ if (!err) {
+ if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1))
+ esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
+ else
+ esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
+ }
+
+ return err;
}
static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
@@ -621,7 +1166,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters_2);
- MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
} else {
@@ -672,12 +1218,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_handle **flows;
- struct mlx5_flow_handle *flow;
- struct mlx5_flow_spec *spec;
/* total vports is the same for both e-switches */
int nvports = esw->total_vports;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_vport *vport;
+ unsigned long i;
void *misc;
- int err, i;
+ int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
@@ -685,7 +1233,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
peer_miss_rules_setup(esw, peer_dev, spec, &dest);
- flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
+ flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL);
if (!flows) {
err = -ENOMEM;
goto alloc_flows_err;
@@ -696,6 +1244,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
spec, MLX5_VPORT_PF);
@@ -705,10 +1254,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_pf_flow_err;
}
- flows[MLX5_VPORT_PF] = flow;
+ flows[vport->index] = flow;
}
if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
@@ -716,13 +1266,13 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_ecpf_flow_err;
}
- flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+ flows[vport->index] = flow;
}
- mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
esw_set_peer_miss_rule_source_port(esw,
peer_dev->priv.eswitch,
- spec, i);
+ spec, vport->vport);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
@@ -730,7 +1280,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_vf_flow_err;
}
- flows[i] = flow;
+ flows[vport->index] = flow;
}
esw->fdb_table.offloads.peer_miss_rules = flows;
@@ -739,15 +1289,20 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
return 0;
add_vf_flow_err:
- nvports = --i;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
- mlx5_del_flow_rules(flows[i]);
-
- if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+ if (!flows[vport->index])
+ continue;
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
add_ecpf_flow_err:
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
add_pf_flow_err:
esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
kvfree(flows);
@@ -759,20 +1314,23 @@ alloc_flows_err:
static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
{
struct mlx5_flow_handle **flows;
- int i;
+ struct mlx5_vport *vport;
+ unsigned long i;
flows = esw->fdb_table.offloads.peer_miss_rules;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
- mlx5_core_max_vfs(esw->dev))
- mlx5_del_flow_rules(flows[i]);
-
- if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
+ mlx5_del_flow_rules(flows[vport->index]);
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
kvfree(flows);
}
@@ -836,6 +1394,53 @@ out:
return err;
}
+struct mlx5_flow_handle *
+esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
+{
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore;
+ struct mlx5_flow_context *flow_context;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_destination dest;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ ESW_REG_C0_USER_DATA_METADATA_MASK);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id;
+
+ flow_context = &spec->flow_context;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = tag;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = esw->offloads.ft_offloads;
+
+ flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev,
+ "Failed to create restore rule for tag: %d, err(%d)\n",
+ tag, (int)PTR_ERR(flow_rule));
+
+ return flow_rule;
+}
+
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
@@ -851,8 +1456,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS_2);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters_2.metadata_reg_c_0);
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
} else {
MLX5_SET(create_flow_group_in, flow_group_in,
match_criteria_enable,
@@ -863,68 +1469,177 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
}
}
-static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *fdb = NULL;
- u32 flags = 0, *flow_group_in;
- int table_size, ix, err = 0;
- struct mlx5_flow_group *g;
- void *match_criteria;
- u8 *dmac;
-
- esw_debug(esw->dev, "Create offloads FDB Tables\n");
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
+ struct mlx5_vport_tbl_attr attr;
+ struct mlx5_vport *vport;
+ unsigned long i;
- root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
- if (!root_ns) {
- esw_warn(dev, "Failed to get FDB flow namespace\n");
- err = -EOPNOTSUPP;
- goto ns_err;
+ attr.chain = 0;
+ attr.prio = 1;
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ attr.vport = vport->vport;
+ attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ mlx5_esw_vporttbl_put(esw, &attr);
}
- esw->fdb_table.offloads.ns = root_ns;
- err = mlx5_flow_namespace_set_mode(root_ns,
- esw->dev->priv.steering->mode);
- if (err) {
- esw_warn(dev, "Failed to set FDB namespace steering mode\n");
- goto ns_err;
+}
+
+static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport_tbl_attr attr;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ attr.chain = 0;
+ attr.prio = 1;
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ attr.vport = vport->vport;
+ attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ fdb = mlx5_esw_vporttbl_get(esw, &attr);
+ if (IS_ERR(fdb))
+ goto out;
}
+ return 0;
- table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
- MLX5_ESW_MISS_FLOWS + esw->total_vports;
+out:
+ esw_vport_tbl_put(esw);
+ return PTR_ERR(fdb);
+}
+
+#define fdb_modify_header_fwd_to_table_supported(esw) \
+ (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
+static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level))
+ *flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
+ } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
+ } else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
+ /* Disabled when ttl workaround is needed, e.g
+ * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
+ */
+ esw_warn(dev,
+ "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ } else {
+ *flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_info(dev, "Supported tc chains and prios offload\n");
+ }
- /* create the slow path fdb with encap set, so further table instances
- * can be created at run time while VFs are probed if the FW allows that.
- */
if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
- flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
- MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+ *flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED;
+}
- ft_attr.flags = flags;
- ft_attr.max_fte = table_size;
- ft_attr.prio = FDB_SLOW_PATH;
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table *nf_ft, *ft;
+ struct mlx5_chains_attr attr = {};
+ struct mlx5_fs_chains *chains;
+ u32 fdb_max;
+ int err;
- fdb = mlx5_create_flow_table(root_ns, &ft_attr);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
- goto slow_fdb_err;
- }
- esw->fdb_table.offloads.slow_fdb = fdb;
+ fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
- err = mlx5_esw_chains_create(esw);
- if (err) {
+ esw_init_chains_offload_flags(esw, &attr.flags);
+ attr.ns = MLX5_FLOW_NAMESPACE_FDB;
+ attr.max_ft_sz = fdb_max;
+ attr.max_grp_num = esw->params.large_group_num;
+ attr.default_ft = miss_fdb;
+ attr.mapping = esw->offloads.reg_c0_obj_pool;
+
+ chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(chains)) {
+ err = PTR_ERR(chains);
esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
- goto fdb_chains_err;
+ return err;
+ }
+
+ esw->fdb_table.offloads.esw_chains_priv = chains;
+
+ /* Create tc_end_ft which is the always created ft chain */
+ nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains),
+ 1, 0);
+ if (IS_ERR(nf_ft)) {
+ err = PTR_ERR(nf_ft);
+ goto nf_ft_err;
+ }
+
+ /* Always open the root for fast path */
+ ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto level_0_err;
}
- /* create send-to-vport group */
+ /* Open level 1 for split fdb rules now if prios isn't supported */
+ if (!mlx5_chains_prios_supported(chains)) {
+ err = esw_vport_tbl_get(esw);
+ if (err)
+ goto level_1_err;
+ }
+
+ mlx5_chains_set_end_ft(chains, nf_ft);
+
+ return 0;
+
+level_1_err:
+ mlx5_chains_put_table(chains, 0, 1, 0);
+level_0_err:
+ mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+nf_ft_err:
+ mlx5_chains_destroy(chains);
+ esw->fdb_table.offloads.esw_chains_priv = NULL;
+
+ return err;
+}
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ esw_vport_tbl_put(esw);
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+ mlx5_chains_destroy(chains);
+}
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{ return 0; }
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{}
+
+#endif
+
+static int
+esw_create_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int count, err = 0;
+
+ memset(flow_group_in, 0, inlen);
+
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS);
@@ -932,20 +1647,92 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
- ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+ /* See comment at table_size calculation */
+ count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1);
+ *ix += count;
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
- esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
- goto send_vport_err;
+ esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ goto out;
}
esw->fdb_table.offloads.send_to_vport_grp = g;
- /* create peer esw miss group */
+out:
+ return err;
+}
+
+static int
+esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!esw_src_port_rewrite_supported(esw))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ end_flow_index, *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev,
+ "Failed to create send-to-vport meta flow group err(%d)\n", err);
+ goto send_vport_meta_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_meta_grp = g;
+
+ return 0;
+
+send_vport_meta_err:
+ return err;
+}
+
+static int
+esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return 0;
+
memset(flow_group_in, 0, inlen);
esw_set_flow_group_source_port(esw, flow_group_in);
@@ -962,21 +1749,37 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
source_eswitch_owner_vhca_id_valid, 1);
}
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + esw->total_vports - 1);
- ix += esw->total_vports;
+ *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
- esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
- goto peer_miss_err;
+ esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto out;
}
esw->fdb_table.offloads.peer_miss_grp = g;
- /* create miss group */
+out:
+ return err;
+}
+
+static int
+esw_create_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+ u8 *dmac;
+
memset(flow_group_in, 0, inlen);
+
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_OUTER_HEADERS);
match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
@@ -985,14 +1788,14 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
outer_headers.dmac_47_16);
dmac[0] = 0x01;
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + MLX5_ESW_MISS_FLOWS);
+ *ix + MLX5_ESW_MISS_FLOWS);
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
- esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
+ esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err);
goto miss_err;
}
esw->fdb_table.offloads.miss_grp = g;
@@ -1001,19 +1804,129 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
if (err)
goto miss_rule_err;
- esw->nvports = nvports;
- kvfree(flow_group_in);
return 0;
miss_rule_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
- mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+ return err;
+}
+
+static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb = NULL;
+ int table_size, ix = 0, err = 0;
+ u32 flags = 0, *flow_group_in;
+
+ esw_debug(esw->dev, "Create offloads FDB Tables\n");
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ err = -EOPNOTSUPP;
+ goto ns_err;
+ }
+ esw->fdb_table.offloads.ns = root_ns;
+ err = mlx5_flow_namespace_set_mode(root_ns,
+ esw->dev->priv.steering->mode);
+ if (err) {
+ esw_warn(dev, "Failed to set FDB namespace steering mode\n");
+ goto ns_err;
+ }
+
+ /* To be strictly correct:
+ * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+ * should be:
+ * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+ * but as the peer device might not be in switchdev mode it's not
+ * possible. We use the fact that by default FW sets max vfs and max sfs
+ * to the same value on both devices. If it needs to be changed in the future note
+ * the peer miss group should also be created based on the number of
+ * total vports of the peer (currently is also uses esw->total_vports).
+ */
+ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
+ esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS;
+
+ /* create the slow path fdb with encap set, so further table instances
+ * can be created at run time while VFs are probed if the FW allows that.
+ */
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ ft_attr.flags = flags;
+ ft_attr.max_fte = table_size;
+ ft_attr.prio = FDB_SLOW_PATH;
+
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
+ goto slow_fdb_err;
+ }
+ esw->fdb_table.offloads.slow_fdb = fdb;
+
+ /* Create empty TC-miss managed table. This allows plugging in following
+ * priorities without directly exposing their level 0 table to
+ * eswitch_offloads and passing it as miss_fdb to following call to
+ * esw_chains_create().
+ */
+ memset(&ft_attr, 0, sizeof(ft_attr));
+ ft_attr.prio = FDB_TC_MISS;
+ esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) {
+ err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table);
+ esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err);
+ goto tc_miss_table_err;
+ }
+
+ err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table);
+ if (err) {
+ esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
+ goto fdb_chains_err;
+ }
+
+ err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto send_vport_err;
+
+ err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto send_vport_meta_err;
+
+ err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto peer_miss_err;
+
+ err = esw_create_miss_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto miss_err;
+
+ kvfree(flow_group_in);
+ return 0;
+
+miss_err:
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
peer_miss_err:
+ if (esw->fdb_table.offloads.send_to_vport_meta_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
+send_vport_meta_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
- mlx5_esw_chains_destroy(esw);
+ esw_chains_destroy(esw, esw_chains(esw));
fdb_chains_err:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
+tc_miss_table_err:
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
slow_fdb_err:
/* Holds true only as long as DMFS is the default */
@@ -1032,17 +1945,34 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
- mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+ if (esw->fdb_table.offloads.send_to_vport_meta_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
- mlx5_esw_chains_destroy(esw);
+ esw_chains_destroy(esw, esw_chains(esw));
+
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
/* Holds true only as long as DMFS is the default */
mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
MLX5_FLOW_STEERING_MODE_DMFS);
+ atomic64_set(&esw->user_count, 0);
+}
+
+static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw)
+{
+ int nvports;
+
+ nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
+ if (mlx5e_tc_int_port_supported(esw))
+ nvports += MLX5E_TC_MAX_INT_PORT_NUM;
+
+ return nvports;
}
-static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
@@ -1056,7 +1986,9 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
return -EOPNOTSUPP;
}
- ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
+ ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) +
+ MLX5_ESW_FT_OFFLOADS_DROP_RULE;
+ ft_attr.prio = 1;
ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft_offloads)) {
@@ -1076,14 +2008,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
mlx5_destroy_flow_table(offloads->ft_offloads);
}
-static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *g;
u32 *flow_group_in;
+ int nvports;
int err = 0;
- nvports = nvports + MLX5_ESW_MISS_FLOWS;
+ nvports = esw_get_nr_ft_offloads_steering_src_ports(esw);
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
@@ -1113,6 +2046,52 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
}
+static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw)
+{
+ /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+ * for the drop rule, which is placed at the end of the table.
+ * So return the total of vport and int_port as rule index.
+ */
+ return esw_get_nr_ft_offloads_steering_src_ports(esw);
+}
+
+static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int flow_index;
+ int err = 0;
+
+ flow_index = esw_create_vport_rx_drop_rule_index(esw);
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_drop_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_group)
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group);
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
struct mlx5_flow_destination *dest)
@@ -1134,7 +2113,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
- MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
} else {
@@ -1160,29 +2140,185 @@ out:
return flow_rule;
}
-static int esw_offloads_start(struct mlx5_eswitch *esw,
- struct netlink_ext_ack *extack)
+static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw)
{
- int err, err1;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
- if (esw->mode != MLX5_ESWITCH_LEGACY &&
- !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Can't set offloads mode, SRIOV legacy not enabled");
- return -EINVAL;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL,
+ &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "fs offloads: Failed to add vport rx drop rule err %ld\n",
+ PTR_ERR(flow_rule));
+ return PTR_ERR(flow_rule);
}
- mlx5_eswitch_disable(esw, false);
- mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
- err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
+ esw->offloads.vport_rx_drop_rule = flow_rule;
+
+ return 0;
+}
+
+static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_rule)
+ mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule);
+}
+
+static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
+{
+ u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (!mlx5_esw_is_fdb_created(esw))
+ return -EOPNOTSUPP;
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ mlx5_mode = MLX5_INLINE_MODE_NONE;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_L2:
+ mlx5_mode = MLX5_INLINE_MODE_L2;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ goto query_vports;
+ }
+
+query_vports:
+ mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
+ if (prev_mlx5_mode != mlx5_mode)
+ return -EINVAL;
+ prev_mlx5_mode = mlx5_mode;
+ }
+
+out:
+ *mode = mlx5_mode;
+ return 0;
+}
+
+static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
+ return;
+
+ mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id);
+ mlx5_destroy_flow_group(offloads->restore_group);
+ mlx5_destroy_flow_table(offloads->ft_offloads_restore);
+}
+
+static int esw_create_restore_table(struct mlx5_eswitch *esw)
+{
+ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_modify_hdr *mod_hdr;
+ void *match_criteria, *misc;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int err = 0;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ esw_warn(esw->dev, "Failed to create restore table, err %d\n",
+ err);
+ goto out_free;
+ }
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ ESW_REG_C0_USER_DATA_METADATA_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ft_attr.max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create restore flow group, err: %d\n",
+ err);
+ goto err_group;
+ }
+
+ MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY);
+ MLX5_SET(copy_action_in, modact, src_field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
+ MLX5_SET(copy_action_in, modact, dst_field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ mod_hdr = mlx5_modify_header_alloc(esw->dev,
+ MLX5_FLOW_NAMESPACE_KERNEL, 1,
+ modact);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ esw_warn(dev, "Failed to create restore mod header, err: %d\n",
+ err);
+ goto err_mod_hdr;
+ }
+
+ esw->offloads.ft_offloads_restore = ft;
+ esw->offloads.restore_group = g;
+ esw->offloads.restore_copy_hdr_id = mod_hdr;
+
+ kvfree(flow_group_in);
+
+ return 0;
+
+err_mod_hdr:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static int esw_offloads_start(struct mlx5_eswitch *esw,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ esw->mode = MLX5_ESWITCH_OFFLOADS;
+ err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch to offloads");
- err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
- if (err1) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed setting eswitch back to legacy");
- }
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ mlx5_rescan_drivers(esw->dev);
}
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
if (mlx5_eswitch_inline_mode_get(esw,
@@ -1195,34 +2331,82 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
return err;
}
-void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ xa_mark_t mark)
{
- kfree(esw->offloads.vport_reps);
+ bool mark_set;
+
+ /* Copy the mark from vport to its rep */
+ mark_set = xa_get_mark(&esw->vports, rep->vport, mark);
+ if (mark_set)
+ xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark);
}
-int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
{
- int total_vports = esw->total_vports;
struct mlx5_eswitch_rep *rep;
- int vport_index;
- u8 rep_type;
+ int rep_type;
+ int err;
- esw->offloads.vport_reps = kcalloc(total_vports,
- sizeof(struct mlx5_eswitch_rep),
- GFP_KERNEL);
- if (!esw->offloads.vport_reps)
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
return -ENOMEM;
- mlx5_esw_for_all_reps(esw, vport_index, rep) {
- rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
- rep->vport_index = vport_index;
+ rep->vport = vport->vport;
+ rep->vport_index = vport->index;
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
+
+ err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
+ if (err)
+ goto insert_err;
- for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
- atomic_set(&rep->rep_data[rep_type].state,
- REP_UNREGISTERED);
- }
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF);
+ return 0;
+
+insert_err:
+ kfree(rep);
+ return err;
+}
+
+static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ xa_erase(&esw->offloads.vport_reps, rep->vport);
+ kfree(rep);
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ mlx5_esw_for_each_rep(esw, i, rep)
+ mlx5_esw_offloads_rep_cleanup(esw, rep);
+ xa_destroy(&esw->offloads.vport_reps);
+}
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ xa_init(&esw->offloads.vport_reps);
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ err = mlx5_esw_offloads_rep_init(esw, vport);
+ if (err)
+ goto err;
+ }
return 0;
+
+err:
+ esw_offloads_cleanup_reps(esw);
+ return err;
}
static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
@@ -1233,9 +2417,24 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
esw->offloads.rep_ops[rep_type]->unload(rep);
}
-static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
+static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ mlx5_esw_for_each_sf_rep(esw, i, rep)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ __unload_reps_sf_vport(esw, rep_type);
+
+ mlx5_esw_for_each_vf_rep(esw, i, rep)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
if (mlx5_ecpf_vport_exists(esw->dev)) {
rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
@@ -1251,190 +2450,341 @@ static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
__esw_offloads_unload_rep(esw, rep, rep_type);
}
-static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ int rep_type;
+ int err;
+
+ rep = mlx5_eswitch_get_rep(esw, vport_num);
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
+ REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
+ err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
+ if (err)
+ goto err_reps;
+ }
+
+ return 0;
- mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports)
+err_reps:
+ atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED);
+ for (--rep_type; rep_type >= 0; rep_type--)
__esw_offloads_unload_rep(esw, rep, rep_type);
+ return err;
}
-static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
+void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
{
- u8 rep_type = NUM_REP_TYPES;
+ struct mlx5_eswitch_rep *rep;
+ int rep_type;
- while (rep_type-- > 0)
- __unload_reps_vf_vport(esw, nvports, rep_type);
+ rep = mlx5_eswitch_get_rep(esw, vport_num);
+ for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
}
-static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
{
- __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+ int err;
+
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ if (vport_num != MLX5_VPORT_UPLINK) {
+ err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+ if (err)
+ return err;
+ }
+
+ err = mlx5_esw_offloads_rep_load(esw, vport_num);
+ if (err)
+ goto load_err;
+ return err;
- /* Special vports must be the last to unload. */
- __unload_reps_special_vport(esw, rep_type);
+load_err:
+ if (vport_num != MLX5_VPORT_UPLINK)
+ mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+ return err;
}
-static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
+void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
{
- u8 rep_type = NUM_REP_TYPES;
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return;
- while (rep_type-- > 0)
- __unload_reps_all_vport(esw, rep_type);
+ mlx5_esw_offloads_rep_unload(esw, vport_num);
+
+ if (vport_num != MLX5_VPORT_UPLINK)
+ mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
}
-static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep, u8 rep_type)
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
{
- int err = 0;
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ int err;
- if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
- REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
- err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
- if (err)
- atomic_set(&rep->rep_data[rep_type].state,
- REP_REGISTERED);
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+
+ if (master) {
+ ns = mlx5_get_flow_namespace(master,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
}
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
return err;
}
-static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_table *acl)
{
- struct mlx5_eswitch_rep *rep;
- int err;
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
- rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
- err = __esw_offloads_load_rep(esw, rep, rep_type);
- if (err)
- return err;
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
- err = __esw_offloads_load_rep(esw, rep, rep_type);
- if (err)
- goto err_pf;
- }
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(slave, vhca_id));
- if (mlx5_ecpf_vport_exists(esw->dev)) {
- rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
- err = __esw_offloads_load_rep(esw, rep, rep_type);
- if (err)
- goto err_ecpf;
- }
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
- return 0;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = slave->priv.eswitch->manager_vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
-err_ecpf:
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
- __esw_offloads_unload_rep(esw, rep, rep_type);
- }
+ flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+ &dest, 1);
+ if (IS_ERR(flow_rule))
+ err = PTR_ERR(flow_rule);
+ else
+ vport->egress.offloads.bounce_rule = flow_rule;
-err_pf:
- rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
- __esw_offloads_unload_rep(esw, rep, rep_type);
+ kvfree(spec);
return err;
}
-static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
{
- struct mlx5_eswitch_rep *rep;
- int err, i;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_eswitch *esw = master->priv.eswitch;
+ struct mlx5_flow_table_attr ft_attr = {
+ .max_fte = 1, .prio = 0, .level = 0,
+ .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
+ };
+ struct mlx5_flow_namespace *egress_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ struct mlx5_vport *vport;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
- mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) {
- err = __esw_offloads_load_rep(esw, rep, rep_type);
- if (err)
- goto err_vf;
+ vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->index);
+ if (!egress_ns)
+ return -EINVAL;
+
+ if (vport->egress.acl)
+ return -EINVAL;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ goto out;
}
- return 0;
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
-err_vf:
- __unload_reps_vf_vport(esw, --i, rep_type);
- return err;
-}
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
-{
- int err;
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ goto err_group;
+ }
- /* Special vports must be loaded first, uplink rep creates mdev resource. */
- err = __load_reps_special_vport(esw, rep_type);
+ err = __esw_set_master_egress_rule(master, slave, vport, acl);
if (err)
- return err;
+ goto err_rule;
- err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
- if (err)
- goto err_vfs;
+ vport->egress.acl = acl;
+ vport->egress.offloads.bounce_grp = g;
+
+ kvfree(flow_group_in);
return 0;
-err_vfs:
- __unload_reps_special_vport(esw, rep_type);
+err_rule:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(acl);
+out:
+ kvfree(flow_group_in);
return err;
}
-static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
{
- u8 rep_type = 0;
- int err;
-
- for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = __load_reps_vf_vport(esw, nvports, rep_type);
- if (err)
- goto err_reps;
- }
+ struct mlx5_vport *vport;
- return err;
+ vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+ dev->priv.eswitch->manager_vport);
-err_reps:
- while (rep_type-- > 0)
- __unload_reps_vf_vport(esw, nvports, rep_type);
- return err;
+ esw_acl_egress_ofld_cleanup(vport);
}
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
{
- u8 rep_type = 0;
int err;
- for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = __load_reps_all_vport(esw, rep_type);
- if (err)
- goto err_reps;
- }
+ err = esw_set_slave_root_fdb(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ return err;
+
+ err = esw_set_master_egress_rule(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_acl;
return err;
-err_reps:
- while (rep_type-- > 0)
- __unload_reps_all_vport(esw, rep_type);
+err_acl:
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
return err;
}
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ esw_unset_master_egress_rule(master_esw->dev);
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+}
+
#define ESW_OFFLOADS_DEVCOM_PAIR (0)
#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
+{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ rep_type = NUM_REP_TYPES;
+ while (rep_type--) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event)
+ ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+ }
+ }
+}
+
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+{
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+ mlx5e_tc_clean_fdb_peer_flows(esw);
+#endif
+ mlx5_esw_offloads_rep_event_unpair(esw);
+ esw_del_fdb_peer_miss_rules(esw);
+}
+
static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
struct mlx5_eswitch *peer_esw)
{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
int err;
err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
if (err)
return err;
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event) {
+ err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+ if (err)
+ goto err_out;
+ }
+ }
+ }
+
return 0;
-}
-static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
-{
- mlx5e_tc_clean_fdb_peer_flows(esw);
- esw_del_fdb_peer_miss_rules(esw);
+err_out:
+ mlx5_esw_offloads_unpair(esw);
+ return err;
}
static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
@@ -1528,6 +2878,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_register_component(devcom,
MLX5_DEVCOM_ESW_OFFLOADS,
mlx5_esw_offloads_devcom_event,
@@ -1545,317 +2898,174 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
}
-static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
{
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec;
- int err = 0;
+ if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+ return false;
- /* For prio tag mode, there is only 1 FTEs:
- * 1) Untagged packets - push prio tag VLAN and modify metadata if
- * required, allow
- * Unmatched traffic is allowed by default
- */
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return -ENOMEM;
+ if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_0))
+ return false;
- /* Untagged packets - push prio tag VLAN, allow */
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
- MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- flow_act.vlan[0].ethtype = ETH_P_8021Q;
- flow_act.vlan[0].vid = 0;
- flow_act.vlan[0].prio = 0;
-
- if (vport->ingress.offloads.modify_metadata_rule) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
- }
-
- vport->ingress.allow_rule =
- mlx5_add_flow_rules(vport->ingress.acl, spec,
- &flow_act, NULL, 0);
- if (IS_ERR(vport->ingress.allow_rule)) {
- err = PTR_ERR(vport->ingress.allow_rule);
- esw_warn(esw->dev,
- "vport[%d] configure ingress untagged allow rule, err(%d)\n",
- vport->vport, err);
- vport->ingress.allow_rule = NULL;
- }
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
- kvfree(spec);
- return err;
+ return true;
}
-static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+#define MLX5_ESW_METADATA_RSVD_UPLINK 1
+
+/* Share the same metadata for uplink's. This is fine because:
+ * (a) In shared FDB mode (LAG) both uplink's are treated the
+ * same and tagged with the same metadata.
+ * (b) In non shared FDB mode, packets from physical port0
+ * cannot hit eswitch of PF1 and vice versa.
+ */
+static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw)
{
- u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
- static const struct mlx5_flow_spec spec = {};
- struct mlx5_flow_act flow_act = {};
- int err = 0;
+ return MLX5_ESW_METADATA_RSVD_UPLINK;
+}
- MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
- MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
- MLX5_SET(set_action_in, action, data,
- mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
+{
+ u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
+ /* Reserve 0xf for internal port offload */
+ u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2;
+ u32 pf_num;
+ int id;
- vport->ingress.offloads.modify_metadata =
- mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- 1, action);
- if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
- err = PTR_ERR(vport->ingress.offloads.modify_metadata);
- esw_warn(esw->dev,
- "failed to alloc modify header for vport %d ingress acl (%d)\n",
- vport->vport, err);
- return err;
- }
+ /* Only 4 bits of pf_num */
+ pf_num = mlx5_get_dev_index(esw->dev);
+ if (pf_num > max_pf_num)
+ return 0;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
- vport->ingress.offloads.modify_metadata_rule =
- mlx5_add_flow_rules(vport->ingress.acl,
- &spec, &flow_act, NULL, 0);
- if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
- err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
- esw_warn(esw->dev,
- "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
- vport->vport, err);
- mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
- vport->ingress.offloads.modify_metadata_rule = NULL;
- }
- return err;
+ /* Metadata is 4 bits of PFNUM and 12 bits of unique id */
+ /* Use only non-zero vport_id (2-4095) for all PF's */
+ id = ida_alloc_range(&esw->offloads.vport_metadata_ida,
+ MLX5_ESW_METADATA_RSVD_UPLINK + 1,
+ vport_end_ida, GFP_KERNEL);
+ if (id < 0)
+ return 0;
+ id = (pf_num << ESW_VPORT_BITS) | id;
+ return id;
}
-static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
{
- if (vport->ingress.offloads.modify_metadata_rule) {
- mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
- mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1;
- vport->ingress.offloads.modify_metadata_rule = NULL;
- }
+ /* Metadata contains only 12 bits of actual ida id */
+ ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask);
}
-static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_group *g;
- void *match_criteria;
- u32 *flow_group_in;
- u32 flow_index = 0;
- int ret = 0;
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- if (esw_check_ingress_prio_tag_enabled(esw, vport)) {
- /* This group is to hold FTE to match untagged packets when prio_tag
- * is enabled.
- */
- memset(flow_group_in, 0, inlen);
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ vport->default_metadata = mlx5_esw_match_metadata_reserved(esw);
+ else
+ vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in,
- flow_group_in, match_criteria);
- MLX5_SET(create_flow_group_in, flow_group_in,
- match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- ret = PTR_ERR(g);
- esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
- vport->vport, ret);
- goto prio_tag_err;
- }
- vport->ingress.offloads.metadata_prio_tag_grp = g;
- flow_index++;
- }
+ vport->metadata = vport->default_metadata;
+ return vport->metadata ? 0 : -ENOSPC;
+}
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- /* This group holds an FTE with no matches for add metadata for
- * tagged packets, if prio-tag is enabled (as a fallthrough),
- * or all traffic in case prio-tag is disabled.
- */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- ret = PTR_ERR(g);
- esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
- vport->vport, ret);
- goto metadata_err;
- }
- vport->ingress.offloads.metadata_allmatch_grp = g;
- }
+static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->default_metadata)
+ return;
- kvfree(flow_group_in);
- return 0;
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ return;
-metadata_err:
- if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
- mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
- vport->ingress.offloads.metadata_prio_tag_grp = NULL;
- }
-prio_tag_err:
- kvfree(flow_group_in);
- return ret;
+ WARN_ON(vport->metadata != vport->default_metadata);
+ mlx5_esw_match_metadata_free(esw, vport->default_metadata);
}
-static void esw_vport_destroy_ingress_acl_group(struct mlx5_vport *vport)
+static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
{
- if (vport->ingress.offloads.metadata_allmatch_grp) {
- mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
- vport->ingress.offloads.metadata_allmatch_grp = NULL;
- }
+ struct mlx5_vport *vport;
+ unsigned long i;
- if (vport->ingress.offloads.metadata_prio_tag_grp) {
- mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
- vport->ingress.offloads.metadata_prio_tag_grp = NULL;
- }
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return;
+
+ mlx5_esw_for_each_vport(esw, i, vport)
+ esw_offloads_vport_metadata_cleanup(esw, vport);
}
-static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
{
- int num_ftes = 0;
+ struct mlx5_vport *vport;
+ unsigned long i;
int err;
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
- !esw_check_ingress_prio_tag_enabled(esw, vport))
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
return 0;
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw))
- num_ftes++;
- if (esw_check_ingress_prio_tag_enabled(esw, vport))
- num_ftes++;
-
- err = esw_vport_create_ingress_acl_table(esw, vport, num_ftes);
- if (err) {
- esw_warn(esw->dev,
- "failed to enable ingress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- err = esw_vport_create_ingress_acl_group(esw, vport);
- if (err)
- goto group_err;
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules\n", vport->vport);
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ err = esw_offloads_vport_metadata_setup(esw, vport);
if (err)
goto metadata_err;
}
- if (esw_check_ingress_prio_tag_enabled(esw, vport)) {
- err = esw_vport_ingress_prio_tag_config(esw, vport);
- if (err)
- goto prio_tag_err;
- }
return 0;
-prio_tag_err:
- esw_vport_del_ingress_acl_modify_metadata(esw, vport);
metadata_err:
- esw_vport_destroy_ingress_acl_group(vport);
-group_err:
- esw_vport_destroy_ingress_acl_table(vport);
+ esw_offloads_metadata_uninit(esw);
return err;
}
-static int esw_vport_egress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable)
{
- int err;
-
- if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
- return 0;
-
- esw_vport_cleanup_egress_rules(esw, vport);
-
- err = esw_vport_enable_egress_acl(esw, vport);
- if (err)
- return err;
-
- /* For prio tag mode, there is only 1 FTEs:
- * 1) prio tag packets - pop the prio tag VLAN, allow
- * Unmatched traffic is allowed by default
- */
- esw_debug(esw->dev,
- "vport[%d] configure prio tag egress rules\n", vport->vport);
-
- /* prio tag vlan rule - pop it so VF receives untagged packets */
- err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, 0,
- MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
- MLX5_FLOW_CONTEXT_ACTION_ALLOW);
- if (err)
- esw_vport_disable_egress_acl(esw, vport);
+ int err = 0;
+ down_write(&esw->mode_lock);
+ if (mlx5_esw_is_fdb_created(esw)) {
+ err = -EBUSY;
+ goto done;
+ }
+ if (!mlx5_esw_vport_match_metadata_supported(esw)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+ if (enable)
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+ else
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
+done:
+ up_write(&esw->mode_lock);
return err;
}
-static bool
-esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
-{
- if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
- return false;
-
- if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
- MLX5_FDB_TO_VPORT_REG_C_0))
- return false;
-
- if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
- return false;
-
- if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
- mlx5_ecpf_vport_exists(esw->dev))
- return false;
-
- return true;
-}
-
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
int err;
- err = esw_vport_ingress_config(esw, vport);
+ err = esw_acl_ingress_ofld_setup(esw, vport);
if (err)
return err;
- if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
- err = esw_vport_egress_config(esw, vport);
- if (err) {
- esw_vport_cleanup_ingress_rules(esw, vport);
- esw_vport_del_ingress_acl_modify_metadata(esw, vport);
- esw_vport_destroy_ingress_acl_group(vport);
- esw_vport_destroy_ingress_acl_table(vport);
- }
- }
+ err = esw_acl_egress_ofld_setup(esw, vport);
+ if (err)
+ goto egress_err;
+
+ return 0;
+
+egress_err:
+ esw_acl_ingress_ofld_cleanup(esw, vport);
return err;
}
@@ -1863,26 +3073,19 @@ void
esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- esw_vport_disable_egress_acl(esw, vport);
- esw_vport_cleanup_ingress_rules(esw, vport);
- esw_vport_del_ingress_acl_modify_metadata(esw, vport);
- esw_vport_destroy_ingress_acl_group(vport);
- esw_vport_destroy_ingress_acl_table(vport);
+ esw_acl_egress_ofld_cleanup(vport);
+ esw_acl_ingress_ofld_cleanup(esw, vport);
}
static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int err;
-
- if (esw_check_vport_match_metadata_supported(esw))
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
- err = esw_vport_create_offloads_acl_tables(esw, vport);
- if (err)
- esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
- return err;
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ return esw_vport_create_offloads_acl_tables(esw, vport);
}
static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
@@ -1890,64 +3093,120 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ if (IS_ERR(vport))
+ return;
+
esw_vport_destroy_offloads_acl_tables(esw, vport);
- esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
+}
+
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ int ret;
+
+ if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+ return 0;
+
+ ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+ mlx5_esw_offloads_rep_load(esw, rep->vport);
+ }
+
+ return 0;
}
static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
- int num_vfs = esw->esw_funcs.num_vfs;
- int total_vports;
+ struct mlx5_esw_indir_table *indir;
int err;
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- total_vports = esw->total_vports;
- else
- total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
-
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
+ mutex_init(&esw->fdb_table.offloads.vports.lock);
+ hash_init(esw->fdb_table.offloads.vports.table);
+ atomic64_set(&esw->user_count, 0);
+
+ indir = mlx5_esw_indir_table_init();
+ if (IS_ERR(indir)) {
+ err = PTR_ERR(indir);
+ goto create_indir_err;
+ }
+ esw->fdb_table.offloads.indir = indir;
err = esw_create_uplink_offloads_acl_tables(esw);
if (err)
- return err;
+ goto create_acl_err;
- err = esw_create_offloads_fdb_tables(esw, total_vports);
+ err = esw_create_offloads_table(esw);
if (err)
- goto create_fdb_err;
+ goto create_offloads_err;
- err = esw_create_offloads_table(esw, total_vports);
+ err = esw_create_restore_table(esw);
if (err)
- goto create_ft_err;
+ goto create_restore_err;
- err = esw_create_vport_rx_group(esw, total_vports);
+ err = esw_create_offloads_fdb_tables(esw);
+ if (err)
+ goto create_fdb_err;
+
+ err = esw_create_vport_rx_group(esw);
if (err)
goto create_fg_err;
+ err = esw_create_vport_rx_drop_group(esw);
+ if (err)
+ goto create_rx_drop_fg_err;
+
+ err = esw_create_vport_rx_drop_rule(esw);
+ if (err)
+ goto create_rx_drop_rule_err;
+
return 0;
+create_rx_drop_rule_err:
+ esw_destroy_vport_rx_drop_group(esw);
+create_rx_drop_fg_err:
+ esw_destroy_vport_rx_group(esw);
create_fg_err:
- esw_destroy_offloads_table(esw);
-
-create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
-
create_fdb_err:
+ esw_destroy_restore_table(esw);
+create_restore_err:
+ esw_destroy_offloads_table(esw);
+create_offloads_err:
esw_destroy_uplink_offloads_acl_tables(esw);
-
+create_acl_err:
+ mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+create_indir_err:
+ mutex_destroy(&esw->fdb_table.offloads.vports.lock);
return err;
}
static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
{
+ esw_destroy_vport_rx_drop_rule(esw);
+ esw_destroy_vport_rx_drop_group(esw);
esw_destroy_vport_rx_group(esw);
- esw_destroy_offloads_table(esw);
esw_destroy_offloads_fdb_tables(esw);
+ esw_destroy_restore_table(esw);
+ esw_destroy_offloads_table(esw);
esw_destroy_uplink_offloads_acl_tables(esw);
+ mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+ mutex_destroy(&esw->fdb_table.offloads.vports.lock);
}
static void
esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
{
+ struct devlink *devlink;
bool host_pf_disabled;
u16 new_num_vfs;
@@ -1959,17 +3218,23 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
return;
+ devlink = priv_to_devlink(esw->dev);
+ devl_lock(devlink);
/* Number of VFs can only change from "0 to x" or "x to 0". */
if (esw->esw_funcs.num_vfs > 0) {
- esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
} else {
int err;
- err = esw_offloads_load_vf_reps(esw, new_num_vfs);
- if (err)
+ err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
+ MLX5_VPORT_UC_ADDR_CHANGE);
+ if (err) {
+ devl_unlock(devlink);
return;
+ }
}
esw->esw_funcs.num_vfs = new_num_vfs;
+ devl_unlock(devlink);
}
static void esw_functions_changed_event_handler(struct work_struct *work)
@@ -2012,69 +3277,119 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type
return NOTIFY_OK;
}
+static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
+{
+ const u32 *query_host_out;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return 0;
+
+ query_host_out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(query_host_out))
+ return PTR_ERR(query_host_out);
+
+ /* Mark non local controller with non zero controller number. */
+ esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out,
+ host_params_context.host_number);
+ kvfree(query_host_out);
+ return 0;
+}
+
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
+{
+ /* Local controller is always valid */
+ if (controller == 0)
+ return true;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return false;
+
+ /* External host number starts with zero in device */
+ return (controller == esw->offloads.host_number + 1);
+}
+
int esw_offloads_enable(struct mlx5_eswitch *esw)
{
+ struct mapping_ctx *reg_c0_obj_pool;
struct mlx5_vport *vport;
- int err, i;
-
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
- MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
- else
- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ unsigned long i;
+ u64 mapping_id;
+ int err;
+ mutex_init(&esw->offloads.termtbl_mutex);
mlx5_rdma_enable_roce(esw->dev);
- err = esw_offloads_steering_init(esw);
+
+ err = mlx5_esw_host_number_init(esw);
if (err)
- goto err_steering_init;
+ goto err_metadata;
+
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ goto err_metadata;
err = esw_set_passing_vport_metadata(esw, true);
if (err)
goto err_vport_metadata;
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ ESW_REG_C0_USER_DATA_METADATA_MASK,
+ true);
+
+ if (IS_ERR(reg_c0_obj_pool)) {
+ err = PTR_ERR(reg_c0_obj_pool);
+ goto err_pool;
+ }
+ esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool;
+
+ err = esw_offloads_steering_init(esw);
+ if (err)
+ goto err_steering_init;
+
/* Representor will control the vport link state */
mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
- err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
+ /* Uplink vport rep must load first. */
+ err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK);
if (err)
- goto err_vports;
+ goto err_uplink;
- err = esw_offloads_load_all_reps(esw);
+ err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
if (err)
- goto err_reps;
+ goto err_vports;
esw_offloads_devcom_init(esw);
- mutex_init(&esw->offloads.termtbl_mutex);
return 0;
-err_reps:
- mlx5_eswitch_disable_pf_vf_vports(esw);
err_vports:
- esw_set_passing_vport_metadata(esw, false);
-err_vport_metadata:
+ esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
+err_uplink:
esw_offloads_steering_cleanup(esw);
err_steering_init:
+ mapping_destroy(reg_c0_obj_pool);
+err_pool:
+ esw_set_passing_vport_metadata(esw, false);
+err_vport_metadata:
+ esw_offloads_metadata_uninit(esw);
+err_metadata:
mlx5_rdma_disable_roce(esw->dev);
+ mutex_destroy(&esw->offloads.termtbl_mutex);
return err;
}
static int esw_offloads_stop(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1;
+ int err;
- mlx5_eswitch_disable(esw, false);
- err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
- if (err) {
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS);
+ if (err)
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
- err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
- if (err1) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed setting eswitch back to offloads");
- }
- }
return err;
}
@@ -2082,12 +3397,14 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_disable(struct mlx5_eswitch *esw)
{
esw_offloads_devcom_cleanup(esw);
- esw_offloads_unload_all_reps(esw);
mlx5_eswitch_disable_pf_vf_vports(esw);
+ esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
esw_set_passing_vport_metadata(esw, false);
esw_offloads_steering_cleanup(esw);
+ mapping_destroy(esw->offloads.reg_c0_obj_pool);
+ esw_offloads_metadata_uninit(esw);
mlx5_rdma_disable_roce(esw->dev);
- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ mutex_destroy(&esw->offloads.termtbl_mutex);
}
static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
@@ -2166,82 +3483,127 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
return 0;
}
-static int mlx5_devlink_eswitch_check(struct devlink *devlink)
-{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
-
- if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
- return -EOPNOTSUPP;
-
- if(!MLX5_ESWITCH_MANAGER(dev))
- return -EPERM;
-
- if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- !mlx5_core_is_ecpf_esw_manager(dev))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
u16 cur_mlx5_mode, mlx5_mode = 0;
- int err;
-
- err = mlx5_devlink_eswitch_check(devlink);
- if (err)
- return err;
+ struct mlx5_eswitch *esw;
+ int err = 0;
- cur_mlx5_mode = dev->priv.eswitch->mode;
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ mlx5_lag_disable_change(esw->dev);
+ err = mlx5_esw_try_lock(esw);
+ if (err < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
+ goto enable_lag;
+ }
+ cur_mlx5_mode = err;
+ err = 0;
+
if (cur_mlx5_mode == mlx5_mode)
- return 0;
+ goto unlock;
- if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
- return esw_offloads_start(dev->priv.eswitch, extack);
- else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
- return esw_offloads_stop(dev->priv.eswitch, extack);
- else
- return -EINVAL;
+ mlx5_eswitch_disable_locked(esw);
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+ if (mlx5_devlink_trap_get_num_active(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change mode while devlink traps are active");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ err = esw_offloads_start(esw, extack);
+ } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
+ err = esw_offloads_stop(esw, extack);
+ mlx5_rescan_drivers(esw->dev);
+ } else {
+ err = -EINVAL;
+ }
+
+unlock:
+ mlx5_esw_unlock(esw);
+enable_lag:
+ mlx5_lag_enable_change(esw->dev);
+ return err;
}
int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw;
int err;
- err = mlx5_devlink_eswitch_check(devlink);
- if (err)
- return err;
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
+ err = esw_mode_to_devlink(esw->mode, mode);
+ up_write(&esw->mode_lock);
+ return err;
+}
+
+static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_vport *vport;
+ u16 err_vport_num = 0;
+ unsigned long i;
+ int err = 0;
- return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
+ if (err) {
+ err_vport_num = vport->vport;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to set min inline on vport");
+ goto revert_inline_mode;
+ }
+ }
+ return 0;
+
+revert_inline_mode:
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ if (vport->vport == err_vport_num)
+ break;
+ mlx5_modify_nic_vport_min_inline(dev,
+ vport->vport,
+ esw->offloads.inline_mode);
+ }
+ return err;
}
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- struct mlx5_eswitch *esw = dev->priv.eswitch;
- int err, vport, num_vport;
+ struct mlx5_eswitch *esw;
u8 mlx5_mode;
+ int err;
- err = mlx5_devlink_eswitch_check(devlink);
- if (err)
- return err;
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
- if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
- return 0;
- /* fall through */
+ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+ err = 0;
+ goto out;
+ }
+
+ fallthrough;
case MLX5_CAP_INLINE_MODE_L2:
NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto out;
case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
break;
}
@@ -2249,83 +3611,40 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
if (atomic64_read(&esw->offloads.num_flows) > 0) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set inline mode when flows are configured");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto out;
}
err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
if (err)
goto out;
- mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
- err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to set min inline on vport");
- goto revert_inline_mode;
- }
- }
+ err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
+ if (err)
+ goto out;
esw->offloads.inline_mode = mlx5_mode;
+ up_write(&esw->mode_lock);
return 0;
-revert_inline_mode:
- num_vport = --vport;
- mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
- mlx5_modify_nic_vport_min_inline(dev,
- vport,
- esw->offloads.inline_mode);
out:
+ up_write(&esw->mode_lock);
return err;
}
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
- struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_eswitch *esw;
int err;
- err = mlx5_devlink_eswitch_check(devlink);
- if (err)
- return err;
-
- return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
-}
-
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
-{
- u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
- struct mlx5_core_dev *dev = esw->dev;
- int vport;
-
- if (!MLX5_CAP_GEN(dev, vport_group_manager))
- return -EOPNOTSUPP;
-
- if (esw->mode == MLX5_ESWITCH_NONE)
- return -EOPNOTSUPP;
-
- switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
- case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
- mlx5_mode = MLX5_INLINE_MODE_NONE;
- goto out;
- case MLX5_CAP_INLINE_MODE_L2:
- mlx5_mode = MLX5_INLINE_MODE_L2;
- goto out;
- case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
- goto query_vports;
- }
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
-query_vports:
- mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
- mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
- mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
- if (prev_mlx5_mode != mlx5_mode)
- return -EINVAL;
- prev_mlx5_mode = mlx5_mode;
- }
-
-out:
- *mode = mlx5_mode;
- return 0;
+ down_write(&esw->mode_lock);
+ err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+ up_write(&esw->mode_lock);
+ return err;
}
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
@@ -2333,78 +3652,105 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- struct mlx5_eswitch *esw = dev->priv.eswitch;
- int err;
+ struct mlx5_eswitch *esw;
+ int err = 0;
- err = mlx5_devlink_eswitch_check(devlink);
- if (err)
- return err;
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
(!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
- !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
- return -EOPNOTSUPP;
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
- if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
- return -EOPNOTSUPP;
+ if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw->offloads.encap = encap;
- return 0;
+ goto unlock;
}
if (esw->offloads.encap == encap)
- return 0;
+ goto unlock;
if (atomic64_read(&esw->offloads.num_flows) > 0) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set encapsulation when flows are configured");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto unlock;
}
esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
- err = esw_create_offloads_fdb_tables(esw, esw->nvports);
+ err = esw_create_offloads_fdb_tables(esw);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed re-creating fast FDB table");
esw->offloads.encap = !encap;
- (void)esw_create_offloads_fdb_tables(esw, esw->nvports);
+ (void)esw_create_offloads_fdb_tables(esw);
}
+unlock:
+ up_write(&esw->mode_lock);
return err;
}
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
enum devlink_eswitch_encap_mode *encap)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
- struct mlx5_eswitch *esw = dev->priv.eswitch;
- int err;
+ struct mlx5_eswitch *esw;
- err = mlx5_devlink_eswitch_check(devlink);
- if (err)
- return err;
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+ down_write(&esw->mode_lock);
*encap = esw->offloads.encap;
+ up_write(&esw->mode_lock);
return 0;
}
+static bool
+mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ /* Currently, only ECPF based device has representor for host PF. */
+ if (vport_num == MLX5_VPORT_PF &&
+ !mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return false;
+
+ if (vport_num == MLX5_VPORT_ECPF &&
+ !mlx5_ecpf_vport_exists(esw->dev))
+ return false;
+
+ return true;
+}
+
void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
const struct mlx5_eswitch_rep_ops *ops,
u8 rep_type)
{
struct mlx5_eswitch_rep_data *rep_data;
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
esw->offloads.rep_ops[rep_type] = ops;
- mlx5_esw_for_all_reps(esw, i, rep) {
- rep_data = &rep->rep_data[rep_type];
- atomic_set(&rep_data->state, REP_REGISTERED);
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
+ rep->esw = esw;
+ rep_data = &rep->rep_data[rep_type];
+ atomic_set(&rep_data->state, REP_REGISTERED);
+ }
}
}
EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
@@ -2412,12 +3758,12 @@ EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
if (esw->mode == MLX5_ESWITCH_OFFLOADS)
__unload_reps_all_vport(esw, rep_type);
- mlx5_esw_for_all_reps(esw, i, rep)
+ mlx5_esw_for_each_rep(esw, i, rep)
atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
}
EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
@@ -2458,11 +3804,11 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
{
- return vport_num >= MLX5_VPORT_FIRST_VF &&
- vport_num <= esw->dev->priv.sriov.max_vfs;
+ return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
}
+EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled);
bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
{
@@ -2470,9 +3816,199 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
}
EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
-u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num)
{
- return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport)))
+ return 0;
+
+ return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
}
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
+
+int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum)
+{
+ int err;
+
+ err = mlx5_esw_vport_enable(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE);
+ if (err)
+ return err;
+
+ err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum);
+ if (err)
+ goto devlink_err;
+
+ mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
+ err = mlx5_esw_offloads_rep_load(esw, vport_num);
+ if (err)
+ goto rep_err;
+ return 0;
+
+rep_err:
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
+devlink_err:
+ mlx5_esw_vport_disable(esw, vport_num);
+ return err;
+}
+
+void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ mlx5_esw_offloads_rep_unload(esw, vport_num);
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport_num);
+}
+
+static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ *vhca_id = 0;
+ if (mlx5_esw_is_manager_vport(esw, vport_num) ||
+ !MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return -EPERM;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ u16 *old_entry, *vhca_map_entry, vhca_id;
+ int err;
+
+ err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ if (err) {
+ esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL);
+ if (!vhca_map_entry)
+ return -ENOMEM;
+
+ *vhca_map_entry = vport_num;
+ old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL);
+ if (xa_is_err(old_entry)) {
+ kfree(vhca_map_entry);
+ return xa_err(old_entry);
+ }
+ kfree(old_entry);
+ return 0;
+}
+
+void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ u16 *vhca_map_entry, vhca_id;
+ int err;
+
+ err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ if (err)
+ esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n",
+ vport_num, err);
+
+ vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id);
+ kfree(vhca_map_entry);
+}
+
+int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num)
+{
+ u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id);
+
+ if (!res)
+ return -ENOENT;
+
+ *vport_num = *res;
+ return 0;
+}
+
+u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport)))
+ return 0;
+
+ return vport->metadata;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);
+
+static bool
+is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_PF ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
+}
+
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
+ u8 *hw_addr, int *hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num))
+ return -EOPNOTSUPP;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(hw_addr, vport->info.mac);
+ *hw_addr_len = ETH_ALEN;
+ mutex_unlock(&esw->state_lock);
+ return 0;
+}
+
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw)) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
+ return PTR_ERR(esw);
+ }
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num)) {
+ NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr");
+ return -EINVAL;
+ }
+
+ return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
deleted file mode 100644
index 4276194b633f..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
+++ /dev/null
@@ -1,758 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-// Copyright (c) 2020 Mellanox Technologies.
-
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/mlx5_ifc.h>
-#include <linux/mlx5/fs.h>
-
-#include "eswitch_offloads_chains.h"
-#include "mlx5_core.h"
-#include "fs_core.h"
-#include "eswitch.h"
-#include "en.h"
-
-#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
-#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
-#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
-#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
-#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
-#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
-#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb)
-#define fdb_ignore_flow_level_supported(esw) \
- (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
-
-#define ESW_OFFLOADS_NUM_GROUPS 4
-
-/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
- * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
- * for each flow table pool. We can allocate up to 16M of each pool,
- * and we keep track of how much we used via get_next_avail_sz_from_pool.
- * Firmware doesn't report any of this for now.
- * ESW_POOL is expected to be sorted from large to small and match firmware
- * pools.
- */
-#define ESW_SIZE (16 * 1024 * 1024)
-static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
- 1 * 1024 * 1024,
- 64 * 1024,
- 128 };
-
-struct mlx5_esw_chains_priv {
- struct rhashtable chains_ht;
- struct rhashtable prios_ht;
- /* Protects above chains_ht and prios_ht */
- struct mutex lock;
-
- struct mlx5_flow_table *tc_end_fdb;
-
- int fdb_left[ARRAY_SIZE(ESW_POOLS)];
-};
-
-struct fdb_chain {
- struct rhash_head node;
-
- u32 chain;
-
- int ref;
-
- struct mlx5_eswitch *esw;
- struct list_head prios_list;
-};
-
-struct fdb_prio_key {
- u32 chain;
- u32 prio;
- u32 level;
-};
-
-struct fdb_prio {
- struct rhash_head node;
- struct list_head list;
-
- struct fdb_prio_key key;
-
- int ref;
-
- struct fdb_chain *fdb_chain;
- struct mlx5_flow_table *fdb;
- struct mlx5_flow_table *next_fdb;
- struct mlx5_flow_group *miss_group;
- struct mlx5_flow_handle *miss_rule;
-};
-
-static const struct rhashtable_params chain_params = {
- .head_offset = offsetof(struct fdb_chain, node),
- .key_offset = offsetof(struct fdb_chain, chain),
- .key_len = sizeof_field(struct fdb_chain, chain),
- .automatic_shrinking = true,
-};
-
-static const struct rhashtable_params prio_params = {
- .head_offset = offsetof(struct fdb_prio, node),
- .key_offset = offsetof(struct fdb_prio, key),
- .key_len = sizeof_field(struct fdb_prio, key),
- .automatic_shrinking = true,
-};
-
-bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw)
-{
- return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-}
-
-u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_chains_prios_supported(esw))
- return 1;
-
- if (fdb_ignore_flow_level_supported(esw))
- return UINT_MAX - 1;
-
- return FDB_TC_MAX_CHAIN;
-}
-
-u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw)
-{
- return mlx5_esw_chains_get_chain_range(esw) + 1;
-}
-
-u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_chains_prios_supported(esw))
- return 1;
-
- if (fdb_ignore_flow_level_supported(esw))
- return UINT_MAX;
-
- return FDB_TC_MAX_PRIO;
-}
-
-static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
-{
- if (fdb_ignore_flow_level_supported(esw))
- return UINT_MAX;
-
- return FDB_TC_LEVELS_PER_PRIO;
-}
-
-#define POOL_NEXT_SIZE 0
-static int
-mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw,
- int desired_size)
-{
- int i, found_i = -1;
-
- for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
- if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) {
- found_i = i;
- if (desired_size != POOL_NEXT_SIZE)
- break;
- }
- }
-
- if (found_i != -1) {
- --fdb_pool_left(esw)[found_i];
- return ESW_POOLS[found_i];
- }
-
- return 0;
-}
-
-static void
-mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
-{
- int i;
-
- for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
- if (sz == ESW_POOLS[i]) {
- ++fdb_pool_left(esw)[i];
- return;
- }
- }
-
- WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz);
-}
-
-static void
-mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw)
-{
- u32 fdb_max;
- int i;
-
- fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size);
-
- for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--)
- fdb_pool_left(esw)[i] =
- ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
-}
-
-static struct mlx5_flow_table *
-mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw,
- u32 chain, u32 prio, u32 level)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_namespace *ns;
- struct mlx5_flow_table *fdb;
- int sz;
-
- if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
- ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
- MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
-
- sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE);
- if (!sz)
- return ERR_PTR(-ENOSPC);
- ft_attr.max_fte = sz;
-
- /* We use tc_slow_fdb(esw) as the table's next_ft till
- * ignore_flow_level is allowed on FT creation and not just for FTEs.
- * Instead caller should add an explicit miss rule if needed.
- */
- ft_attr.next_ft = tc_slow_fdb(esw);
-
- /* The root table(chain 0, prio 1, level 0) is required to be
- * connected to the previous prio (FDB_BYPASS_PATH if exists).
- * We always create it, as a managed table, in order to align with
- * fs_core logic.
- */
- if (!fdb_ignore_flow_level_supported(esw) ||
- (chain == 0 && prio == 1 && level == 0)) {
- ft_attr.level = level;
- ft_attr.prio = prio - 1;
- ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
- } else {
- ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
- ft_attr.prio = FDB_TC_OFFLOAD;
- /* Firmware doesn't allow us to create another level 0 table,
- * so we create all unmanaged tables as level 1.
- *
- * To connect them, we use explicit miss rules with
- * ignore_flow_level. Caller is responsible to create
- * these rules (if needed).
- */
- ft_attr.level = 1;
- ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
- }
-
- ft_attr.autogroup.num_reserved_entries = 2;
- ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS;
- fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
- if (IS_ERR(fdb)) {
- esw_warn(esw->dev,
- "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
- (int)PTR_ERR(fdb), chain, prio, level, sz);
- mlx5_esw_chains_put_sz_to_pool(esw, sz);
- return fdb;
- }
-
- return fdb;
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
- struct mlx5_flow_table *fdb)
-{
- mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte);
- mlx5_destroy_flow_table(fdb);
-}
-
-static struct fdb_chain *
-mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
-{
- struct fdb_chain *fdb_chain = NULL;
- int err;
-
- fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL);
- if (!fdb_chain)
- return ERR_PTR(-ENOMEM);
-
- fdb_chain->esw = esw;
- fdb_chain->chain = chain;
- INIT_LIST_HEAD(&fdb_chain->prios_list);
-
- err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
- chain_params);
- if (err)
- goto err_insert;
-
- return fdb_chain;
-
-err_insert:
- kvfree(fdb_chain);
- return ERR_PTR(err);
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
-{
- struct mlx5_eswitch *esw = fdb_chain->esw;
-
- rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
- chain_params);
- kvfree(fdb_chain);
-}
-
-static struct fdb_chain *
-mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
-{
- struct fdb_chain *fdb_chain;
-
- fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain,
- chain_params);
- if (!fdb_chain) {
- fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain);
- if (IS_ERR(fdb_chain))
- return fdb_chain;
- }
-
- fdb_chain->ref++;
-
- return fdb_chain;
-}
-
-static struct mlx5_flow_handle *
-mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
- struct mlx5_flow_table *next_fdb)
-{
- static const struct mlx5_flow_spec spec = {};
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_act act = {};
-
- act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
- act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = next_fdb;
-
- return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
-}
-
-static int
-mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
- struct mlx5_flow_table *next_fdb)
-{
- struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
- struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
- struct fdb_prio *pos;
- int n = 0, err;
-
- if (fdb_prio->key.level)
- return 0;
-
- /* Iterate in reverse order until reaching the level 0 rule of
- * the previous priority, adding all the miss rules first, so we can
- * revert them if any of them fails.
- */
- pos = fdb_prio;
- list_for_each_entry_continue_reverse(pos,
- &fdb_chain->prios_list,
- list) {
- miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
- next_fdb);
- if (IS_ERR(miss_rules[n])) {
- err = PTR_ERR(miss_rules[n]);
- goto err_prev_rule;
- }
-
- n++;
- if (!pos->key.level)
- break;
- }
-
- /* Success, delete old miss rules, and update the pointers. */
- n = 0;
- pos = fdb_prio;
- list_for_each_entry_continue_reverse(pos,
- &fdb_chain->prios_list,
- list) {
- mlx5_del_flow_rules(pos->miss_rule);
-
- pos->miss_rule = miss_rules[n];
- pos->next_fdb = next_fdb;
-
- n++;
- if (!pos->key.level)
- break;
- }
-
- return 0;
-
-err_prev_rule:
- while (--n >= 0)
- mlx5_del_flow_rules(miss_rules[n]);
-
- return err;
-}
-
-static void
-mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain)
-{
- if (--fdb_chain->ref == 0)
- mlx5_esw_chains_destroy_fdb_chain(fdb_chain);
-}
-
-static struct fdb_prio *
-mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
- u32 chain, u32 prio, u32 level)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_handle *miss_rule = NULL;
- struct mlx5_flow_group *miss_group;
- struct fdb_prio *fdb_prio = NULL;
- struct mlx5_flow_table *next_fdb;
- struct fdb_chain *fdb_chain;
- struct mlx5_flow_table *fdb;
- struct list_head *pos;
- u32 *flow_group_in;
- int err;
-
- fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain);
- if (IS_ERR(fdb_chain))
- return ERR_CAST(fdb_chain);
-
- fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL);
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!fdb_prio || !flow_group_in) {
- err = -ENOMEM;
- goto err_alloc;
- }
-
- /* Chain's prio list is sorted by prio and level.
- * And all levels of some prio point to the next prio's level 0.
- * Example list (prio, level):
- * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
- * In hardware, we will we have the following pointers:
- * (3,0) -> (5,0) -> (7,0) -> Slow path
- * (3,1) -> (5,0)
- * (5,1) -> (7,0)
- * (6,1) -> (7,0)
- */
-
- /* Default miss for each chain: */
- next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
- tc_slow_fdb(esw) :
- tc_end_fdb(esw);
- list_for_each(pos, &fdb_chain->prios_list) {
- struct fdb_prio *p = list_entry(pos, struct fdb_prio, list);
-
- /* exit on first pos that is larger */
- if (prio < p->key.prio || (prio == p->key.prio &&
- level < p->key.level)) {
- /* Get next level 0 table */
- next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb;
- break;
- }
- }
-
- fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- goto err_create;
- }
-
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
- fdb->max_fte - 2);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- fdb->max_fte - 1);
- miss_group = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(miss_group)) {
- err = PTR_ERR(miss_group);
- goto err_group;
- }
-
- /* Add miss rule to next_fdb */
- miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
- if (IS_ERR(miss_rule)) {
- err = PTR_ERR(miss_rule);
- goto err_miss_rule;
- }
-
- fdb_prio->miss_group = miss_group;
- fdb_prio->miss_rule = miss_rule;
- fdb_prio->next_fdb = next_fdb;
- fdb_prio->fdb_chain = fdb_chain;
- fdb_prio->key.chain = chain;
- fdb_prio->key.prio = prio;
- fdb_prio->key.level = level;
- fdb_prio->fdb = fdb;
-
- err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node,
- prio_params);
- if (err)
- goto err_insert;
-
- list_add(&fdb_prio->list, pos->prev);
-
- /* Table is ready, connect it */
- err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb);
- if (err)
- goto err_update;
-
- kvfree(flow_group_in);
- return fdb_prio;
-
-err_update:
- list_del(&fdb_prio->list);
- rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
- prio_params);
-err_insert:
- mlx5_del_flow_rules(miss_rule);
-err_miss_rule:
- mlx5_destroy_flow_group(miss_group);
-err_group:
- mlx5_esw_chains_destroy_fdb_table(esw, fdb);
-err_create:
-err_alloc:
- kvfree(fdb_prio);
- kvfree(flow_group_in);
- mlx5_esw_chains_put_fdb_chain(fdb_chain);
- return ERR_PTR(err);
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw,
- struct fdb_prio *fdb_prio)
-{
- struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
-
- WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio,
- fdb_prio->next_fdb));
-
- list_del(&fdb_prio->list);
- rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
- prio_params);
- mlx5_del_flow_rules(fdb_prio->miss_rule);
- mlx5_destroy_flow_group(fdb_prio->miss_group);
- mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb);
- mlx5_esw_chains_put_fdb_chain(fdb_chain);
- kvfree(fdb_prio);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level)
-{
- struct mlx5_flow_table *prev_fts;
- struct fdb_prio *fdb_prio;
- struct fdb_prio_key key;
- int l = 0;
-
- if ((chain > mlx5_esw_chains_get_chain_range(esw) &&
- chain != mlx5_esw_chains_get_ft_chain(esw)) ||
- prio > mlx5_esw_chains_get_prio_range(esw) ||
- level > mlx5_esw_chains_get_level_range(esw))
- return ERR_PTR(-EOPNOTSUPP);
-
- /* create earlier levels for correct fs_core lookup when
- * connecting tables.
- */
- for (l = 0; l < level; l++) {
- prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l);
- if (IS_ERR(prev_fts)) {
- fdb_prio = ERR_CAST(prev_fts);
- goto err_get_prevs;
- }
- }
-
- key.chain = chain;
- key.prio = prio;
- key.level = level;
-
- mutex_lock(&esw_chains_lock(esw));
- fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
- prio_params);
- if (!fdb_prio) {
- fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain,
- prio, level);
- if (IS_ERR(fdb_prio))
- goto err_create_prio;
- }
-
- ++fdb_prio->ref;
- mutex_unlock(&esw_chains_lock(esw));
-
- return fdb_prio->fdb;
-
-err_create_prio:
- mutex_unlock(&esw_chains_lock(esw));
-err_get_prevs:
- while (--l >= 0)
- mlx5_esw_chains_put_table(esw, chain, prio, l);
- return ERR_CAST(fdb_prio);
-}
-
-void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level)
-{
- struct fdb_prio *fdb_prio;
- struct fdb_prio_key key;
-
- key.chain = chain;
- key.prio = prio;
- key.level = level;
-
- mutex_lock(&esw_chains_lock(esw));
- fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
- prio_params);
- if (!fdb_prio)
- goto err_get_prio;
-
- if (--fdb_prio->ref == 0)
- mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio);
- mutex_unlock(&esw_chains_lock(esw));
-
- while (level-- > 0)
- mlx5_esw_chains_put_table(esw, chain, prio, level);
-
- return;
-
-err_get_prio:
- mutex_unlock(&esw_chains_lock(esw));
- WARN_ONCE(1,
- "Couldn't find table: (chain: %d prio: %d level: %d)",
- chain, prio, level);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw)
-{
- return tc_end_fdb(esw);
-}
-
-static int
-mlx5_esw_chains_init(struct mlx5_eswitch *esw)
-{
- struct mlx5_esw_chains_priv *chains_priv;
- struct mlx5_core_dev *dev = esw->dev;
- u32 max_flow_counter, fdb_max;
- int err;
-
- chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
- if (!chains_priv)
- return -ENOMEM;
- esw_chains_priv(esw) = chains_priv;
-
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
- fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
-
- esw_debug(dev,
- "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n",
- max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max);
-
- mlx5_esw_chains_init_sz_pool(esw);
-
- if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
- esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
- esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
- esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
- } else {
- esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
- esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
- mlx5_esw_chains_get_chain_range(esw),
- mlx5_esw_chains_get_prio_range(esw));
- }
-
- err = rhashtable_init(&esw_chains_ht(esw), &chain_params);
- if (err)
- goto init_chains_ht_err;
-
- err = rhashtable_init(&esw_prios_ht(esw), &prio_params);
- if (err)
- goto init_prios_ht_err;
-
- mutex_init(&esw_chains_lock(esw));
-
- return 0;
-
-init_prios_ht_err:
- rhashtable_destroy(&esw_chains_ht(esw));
-init_chains_ht_err:
- kfree(chains_priv);
- return err;
-}
-
-static void
-mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
-{
- mutex_destroy(&esw_chains_lock(esw));
- rhashtable_destroy(&esw_prios_ht(esw));
- rhashtable_destroy(&esw_chains_ht(esw));
-
- kfree(esw_chains_priv(esw));
-}
-
-static int
-mlx5_esw_chains_open(struct mlx5_eswitch *esw)
-{
- struct mlx5_flow_table *ft;
- int err;
-
- /* Create tc_end_fdb(esw) which is the always created ft chain */
- ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw),
- 1, 0);
- if (IS_ERR(ft))
- return PTR_ERR(ft);
-
- tc_end_fdb(esw) = ft;
-
- /* Always open the root for fast path */
- ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
- if (IS_ERR(ft)) {
- err = PTR_ERR(ft);
- goto level_0_err;
- }
-
- /* Open level 1 for split rules now if prios isn't supported */
- if (!mlx5_esw_chains_prios_supported(esw)) {
- ft = mlx5_esw_chains_get_table(esw, 0, 1, 1);
-
- if (IS_ERR(ft)) {
- err = PTR_ERR(ft);
- goto level_1_err;
- }
- }
-
- return 0;
-
-level_1_err:
- mlx5_esw_chains_put_table(esw, 0, 1, 0);
-level_0_err:
- mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
- return err;
-}
-
-static void
-mlx5_esw_chains_close(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_chains_prios_supported(esw))
- mlx5_esw_chains_put_table(esw, 0, 1, 1);
- mlx5_esw_chains_put_table(esw, 0, 1, 0);
- mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
-}
-
-int
-mlx5_esw_chains_create(struct mlx5_eswitch *esw)
-{
- int err;
-
- err = mlx5_esw_chains_init(esw);
- if (err)
- return err;
-
- err = mlx5_esw_chains_open(esw);
- if (err)
- goto err_open;
-
- return 0;
-
-err_open:
- mlx5_esw_chains_cleanup(esw);
- return err;
-}
-
-void
-mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
-{
- mlx5_esw_chains_close(esw);
- mlx5_esw_chains_cleanup(esw);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
deleted file mode 100644
index 2e13097fe348..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2020 Mellanox Technologies. */
-
-#ifndef __ML5_ESW_CHAINS_H__
-#define __ML5_ESW_CHAINS_H__
-
-bool
-mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level);
-void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
-
-int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
-void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
-
-#endif /* __ML5_ESW_CHAINS_H__ */
-
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index dc08ed9339ab..108a3503f413 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -3,6 +3,8 @@
#include <linux/mlx5/fs.h>
#include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
struct mlx5_termtbl_handle {
struct hlist_node termtbl_hlist;
@@ -28,6 +30,10 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
sizeof(dest->vport.num), hash);
hash = jhash((const void *)&dest->vport.vhca_id,
sizeof(dest->vport.num), hash);
+ if (flow_act->pkt_reformat)
+ hash = jhash(flow_act->pkt_reformat,
+ sizeof(*flow_act->pkt_reformat),
+ hash);
return hash;
}
@@ -37,11 +43,21 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
struct mlx5_flow_act *flow_act2,
struct mlx5_flow_destination *dest2)
{
- return flow_act1->action != flow_act2->action ||
- dest1->vport.num != dest2->vport.num ||
- dest1->vport.vhca_id != dest2->vport.vhca_id ||
- memcmp(&flow_act1->vlan, &flow_act2->vlan,
- sizeof(flow_act1->vlan));
+ int ret;
+
+ ret = flow_act1->action != flow_act2->action ||
+ dest1->vport.num != dest2->vport.num ||
+ dest1->vport.vhca_id != dest2->vport.vhca_id ||
+ memcmp(&flow_act1->vlan, &flow_act2->vlan,
+ sizeof(flow_act1->vlan));
+ if (ret)
+ return ret;
+
+ if (flow_act1->pkt_reformat && flow_act2->pkt_reformat)
+ return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat,
+ sizeof(*flow_act1->pkt_reformat));
+
+ return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat);
}
static int
@@ -49,10 +65,9 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
struct mlx5_termtbl_handle *tt,
struct mlx5_flow_act *flow_act)
{
- static const struct mlx5_flow_spec spec = {};
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *root_ns;
- int err;
+ int err, err2;
root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
if (!root_ns) {
@@ -63,37 +78,41 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
/* As this is the terminating action then the termination table is the
* same prio as the slow path
*/
- ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION;
- ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED |
+ MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.prio = FDB_TC_OFFLOAD;
ft_attr.max_fte = 1;
+ ft_attr.level = 1;
ft_attr.autogroup.max_num_groups = 1;
tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
if (IS_ERR(tt->termtbl)) {
- esw_warn(dev, "Failed to create termination table\n");
- return -EOPNOTSUPP;
+ err = PTR_ERR(tt->termtbl);
+ esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl);
+ return err;
}
- tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act,
+ tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act,
&tt->dest, 1);
-
if (IS_ERR(tt->rule)) {
- esw_warn(dev, "Failed to create termination table rule\n");
+ err = PTR_ERR(tt->rule);
+ esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule);
goto add_flow_err;
}
return 0;
add_flow_err:
- err = mlx5_destroy_flow_table(tt->termtbl);
- if (err)
- esw_warn(dev, "Failed to destroy termination table\n");
+ err2 = mlx5_destroy_flow_table(tt->termtbl);
+ if (err2)
+ esw_warn(dev, "Failed to destroy termination table, err %d\n", err2);
- return -EOPNOTSUPP;
+ return err;
}
static struct mlx5_termtbl_handle *
mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
struct mlx5_flow_act *flow_act,
- struct mlx5_flow_destination *dest)
+ struct mlx5_flow_destination *dest,
+ struct mlx5_esw_flow_attr *attr)
{
struct mlx5_termtbl_handle *tt;
bool found = false;
@@ -101,7 +120,6 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
int err;
mutex_lock(&esw->offloads.termtbl_mutex);
-
hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest);
hash_for_each_possible(esw->offloads.termtbl_tbl, tt,
termtbl_hlist, hash_key) {
@@ -123,13 +141,13 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
tt->dest.vport.num = dest->vport.num;
tt->dest.vport.vhca_id = dest->vport.vhca_id;
+ tt->dest.vport.flags = dest->vport.flags;
memcpy(&tt->flow_act, flow_act, sizeof(*flow_act));
err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act);
- if (err) {
- esw_warn(esw->dev, "Failed to create termination table\n");
+ if (err)
goto tt_create_err;
- }
+
hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key);
tt_add_ref:
tt->ref_count++;
@@ -161,27 +179,25 @@ static void
mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
struct mlx5_flow_act *dst)
{
- if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
- return;
-
- src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
- dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
- memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0]));
- memset(&src->vlan[0], 0, sizeof(src->vlan[0]));
-
- if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
- return;
-
- src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
- dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
- memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1]));
- memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+ if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0]));
+ memset(&src->vlan[0], 0, sizeof(src->vlan[0]));
+
+ if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1]));
+ memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+ }
+ }
}
static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
const struct mlx5_flow_spec *spec)
{
- u32 port_mask, port_value;
+ u16 port_mask, port_value;
if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
return spec->flow_context.flow_source ==
@@ -191,20 +207,37 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
misc_parameters.source_port);
port_value = MLX5_GET(fte_match_param, spec->match_value,
misc_parameters.source_port);
- return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK;
+ return (port_mask & port_value) == MLX5_VPORT_UPLINK;
}
bool
mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_spec *spec)
{
- if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int i;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
+ mlx5e_tc_attr_flags_skip(attr->flags) ||
+ (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port))
return false;
/* push vlan on RX */
- return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
- mlx5_eswitch_offload_is_uplink_port(esw, spec);
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
+ return true;
+
+ /* hairpin */
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (!esw_attr->dest_int_port && esw_attr->dests[i].rep &&
+ esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK)
+ return true;
+
+ return false;
}
struct mlx5_flow_handle *
@@ -232,11 +265,19 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
continue;
+ if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) {
+ term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat;
+ } else {
+ term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ term_tbl_act.pkt_reformat = NULL;
+ }
+
/* get the terminating table for the action list */
tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
- &dest[i]);
+ &dest[i], attr);
if (IS_ERR(tt)) {
- esw_warn(esw->dev, "Failed to create termination table\n");
+ esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt);
goto revert_changes;
}
attr->dests[num_vport_dests].termtbl = tt;
@@ -253,6 +294,9 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
goto revert_changes;
/* create the FTE */
+ flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = NULL;
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
if (IS_ERR(rule))
goto revert_changes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index 8bcf3426b9c6..9459e56ee90a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -23,11 +23,11 @@ static int temp_warn(struct notifier_block *, unsigned long, void *);
static int port_module(struct notifier_block *, unsigned long, void *);
static int pcie_core(struct notifier_block *, unsigned long, void *);
-/* handler which forwards the event to events->nh, driver notifiers */
+/* handler which forwards the event to events->fw_nh, driver notifiers */
static int forward_event(struct notifier_block *, unsigned long, void *);
static struct mlx5_nb events_nbs_ref[] = {
- /* Events to be proccessed by mlx5_core */
+ /* Events to be processed by mlx5_core */
{.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
{.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
{.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
@@ -36,6 +36,7 @@ static struct mlx5_nb events_nbs_ref[] = {
/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
/* QP/WQ resource events to forward */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
@@ -55,12 +56,14 @@ struct mlx5_events {
struct mlx5_core_dev *dev;
struct workqueue_struct *wq;
struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
- /* driver notifier chain */
- struct atomic_notifier_head nh;
+ /* driver notifier chain for fw events */
+ struct atomic_notifier_head fw_nh;
/* port module events stats */
struct mlx5_pme_stats pme_stats;
/*pcie_core*/
struct work_struct pcie_core_work;
+ /* driver notifier chain for sw events */
+ struct blocking_notifier_head sw_nh;
};
static const char *eqe_type_str(u8 type)
@@ -110,6 +113,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_CMD";
case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
+ case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
+ return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
case MLX5_EVENT_TYPE_PAGE_REQUEST:
return "MLX5_EVENT_TYPE_PAGE_REQUEST";
case MLX5_EVENT_TYPE_PAGE_FAULT:
@@ -128,6 +133,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
case MLX5_EVENT_TYPE_DEVICE_TRACER:
return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ case MLX5_EVENT_TYPE_OBJECT_CHANGE:
+ return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
default:
return "Unrecognized event";
}
@@ -331,7 +338,7 @@ static int forward_event(struct notifier_block *nb, unsigned long event, void *d
mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
eqe_type_str(eqe->type), eqe->sub_type);
- atomic_notifier_call_chain(&events->nh, event, data);
+ atomic_notifier_call_chain(&events->fw_nh, event, data);
return NOTIFY_OK;
}
@@ -342,13 +349,16 @@ int mlx5_events_init(struct mlx5_core_dev *dev)
if (!events)
return -ENOMEM;
- ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+ ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
events->dev = dev;
dev->priv.events = events;
events->wq = create_singlethread_workqueue("mlx5_events");
- if (!events->wq)
+ if (!events->wq) {
+ kfree(events);
return -ENOMEM;
+ }
INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
+ BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
return 0;
}
@@ -381,11 +391,14 @@ void mlx5_events_stop(struct mlx5_core_dev *dev)
flush_workqueue(events->wq);
}
+/* This API is used only for processing and forwarding firmware
+ * events to mlx5 consumer.
+ */
int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
{
struct mlx5_events *events = dev->priv.events;
- return atomic_notifier_chain_register(&events->nh, nb);
+ return atomic_notifier_chain_register(&events->fw_nh, nb);
}
EXPORT_SYMBOL(mlx5_notifier_register);
@@ -393,11 +406,41 @@ int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *n
{
struct mlx5_events *events = dev->priv.events;
- return atomic_notifier_chain_unregister(&events->nh, nb);
+ return atomic_notifier_chain_unregister(&events->fw_nh, nb);
}
EXPORT_SYMBOL(mlx5_notifier_unregister);
int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
{
- return atomic_notifier_call_chain(&events->nh, event, data);
+ return atomic_notifier_call_chain(&events->fw_nh, event, data);
+}
+
+/* This API is used only for processing and forwarding driver-specific
+ * events to mlx5 consumers.
+ */
+int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return blocking_notifier_chain_register(&events->sw_nh, nb);
+}
+
+int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return blocking_notifier_chain_unregister(&events->sw_nh, nb);
+}
+
+int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
+ void *data)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return blocking_notifier_call_chain(&events->sw_nh, event, data);
+}
+
+void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
+{
+ queue_work(dev->priv.events->wq, work);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index c0fd2212e890..9a37077152aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -31,7 +31,6 @@
*/
#include <linux/etherdevice.h>
-#include <linux/mlx5/cmd.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
@@ -143,15 +142,15 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
u32 *fpga_qpn)
{
- u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+ u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {};
int ret;
MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ ret = mlx5_cmd_exec_inout(dev, fpga_create_qp, in, out);
if (ret)
return ret;
@@ -165,8 +164,7 @@ int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
enum mlx5_fpga_qpc_field_select fields,
void *fpga_qpc)
{
- u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+ u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {};
MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
@@ -174,20 +172,20 @@ int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, fpga_modify_qp, in);
}
int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
u32 fpga_qpn, void *fpga_qpc)
{
- u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {};
int ret;
MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ ret = mlx5_cmd_exec_inout(dev, fpga_query_qp, in, out);
if (ret)
return ret;
@@ -198,20 +196,19 @@ int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
{
- u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+ u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {};
MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, fpga_destroy_qp, in);
}
int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
bool clear, struct mlx5_fpga_qp_counters *data)
{
- u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {};
int ret;
MLX5_SET(fpga_query_qp_counters_in, in, opcode,
@@ -219,7 +216,7 @@ int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ ret = mlx5_cmd_exec_inout(dev, fpga_query_qp_counters, in, out);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 61021133029e..12abe991583a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -54,7 +54,7 @@ static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
if (unlikely(!buf->sg[0].data))
goto out;
- dma_device = &conn->fdev->mdev->pdev->dev;
+ dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
buf->sg[0].size, buf->dma_dir);
err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
@@ -86,7 +86,7 @@ static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
{
struct device *dma_device;
- dma_device = &conn->fdev->mdev->pdev->dev;
+ dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
if (buf->sg[1].data)
dma_unmap_single(dma_device, buf->sg[1].dma_addr,
buf->sg[1].size, buf->dma_dir);
@@ -115,7 +115,7 @@ static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
data->byte_count = cpu_to_be32(buf->sg[0].size);
- data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
data->addr = cpu_to_be64(buf->sg[0].dma_addr);
conn->qp.rq.pc++;
@@ -155,7 +155,7 @@ static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
if (!buf->sg[sgi].data)
break;
data->byte_count = cpu_to_be32(buf->sg[sgi].size);
- data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
data++;
size++;
@@ -165,7 +165,7 @@ static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
MLX5_OPCODE_SEND);
- ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+ ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.qpn << 8));
conn->qp.sq.pc++;
conn->qp.sq.bufs[ix] = buf;
@@ -221,7 +221,7 @@ static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
}
static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- struct mlx5_core_mkey *mkey)
+ u32 *mkey)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
void *mkc;
@@ -339,14 +339,14 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
switch (opcode) {
case MLX5_CQE_REQ_ERR:
status = ((struct mlx5_err_cqe *)cqe)->syndrome;
- /* Fall through */
+ fallthrough;
case MLX5_CQE_REQ:
mlx5_fpga_conn_sq_cqe(conn, cqe, status);
break;
case MLX5_CQE_RESP_ERR:
status = ((struct mlx5_err_cqe *)cqe)->syndrome;
- /* Fall through */
+ fallthrough;
case MLX5_CQE_RESP_SEND:
mlx5_fpga_conn_rq_cqe(conn, cqe, status);
break;
@@ -362,23 +362,6 @@ static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
}
-static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
- enum mlx5_event event)
-{
- struct mlx5_fpga_conn *conn;
-
- conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
- mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
-}
-
-static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
-{
- struct mlx5_fpga_conn *conn;
-
- conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
- mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
-}
-
static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
unsigned int budget)
{
@@ -405,9 +388,9 @@ static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
mlx5_fpga_conn_arm_cq(conn);
}
-static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t)
{
- struct mlx5_fpga_conn *conn = (void *)data;
+ struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet);
if (unlikely(!conn->qp.active))
return;
@@ -434,7 +417,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
struct mlx5_wq_param wqp;
struct mlx5_cqe64 *cqe;
int inlen, err, eqn;
- unsigned int irqn;
void *cqc, *in;
__be64 *pas;
u32 i;
@@ -463,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
goto err_cqwq;
}
- err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn);
if (err) {
kvfree(in);
goto err_cqwq;
@@ -471,7 +453,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -493,11 +475,8 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
*conn->cq.mcq.arm_db = 0;
conn->cq.mcq.vector = 0;
conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
- conn->cq.mcq.event = mlx5_fpga_conn_cq_event;
- conn->cq.mcq.irqn = irqn;
conn->cq.mcq.uar = fdev->conn_res.uar;
- tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
- (unsigned long)conn);
+ tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
@@ -534,8 +513,9 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
unsigned int tx_size, unsigned int rx_size)
{
struct mlx5_fpga_device *fdev = conn->fdev;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
struct mlx5_core_dev *mdev = fdev->mdev;
- u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
void *in = NULL, *qpc;
int err, inlen;
@@ -593,6 +573,7 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
@@ -600,12 +581,13 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
- err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (err)
goto err_sq_bufs;
- conn->qp.mqp.event = mlx5_fpga_conn_event;
- mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+ conn->qp.qpn = MLX5_GET(create_qp_out, out, qpn);
+ mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.qpn);
goto out;
@@ -658,7 +640,13 @@ static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
{
- mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+ struct mlx5_core_dev *dev = conn->fdev->mdev;
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, conn->qp.qpn);
+ mlx5_cmd_exec_in(dev, destroy_qp, in);
+
mlx5_fpga_conn_free_recv_bufs(conn);
mlx5_fpga_conn_flush_send_bufs(conn);
kvfree(conn->qp.sq.bufs);
@@ -666,30 +654,29 @@ static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
mlx5_wq_destroy(&conn->qp.wq_ctrl);
}
-static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
{
struct mlx5_core_dev *mdev = conn->fdev->mdev;
+ u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.qpn);
- mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+ MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
+ MLX5_SET(qp_2rst_in, in, qpn, conn->qp.qpn);
- return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
- &conn->qp.mqp);
+ return mlx5_cmd_exec_in(mdev, qp_2rst, in);
}
-static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
{
+ u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
struct mlx5_fpga_device *fdev = conn->fdev;
struct mlx5_core_dev *mdev = fdev->mdev;
- u32 *qpc = NULL;
- int err;
+ u32 *qpc;
- mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.qpn);
- qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
- if (!qpc) {
- err = -ENOMEM;
- goto out;
- }
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
@@ -700,32 +687,22 @@ static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
- err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
- &conn->qp.mqp);
- if (err) {
- mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
- goto out;
- }
+ MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.qpn);
-out:
- kfree(qpc);
- return err;
+ return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
}
-static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
{
+ u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
struct mlx5_fpga_device *fdev = conn->fdev;
struct mlx5_core_dev *mdev = fdev->mdev;
- u32 *qpc = NULL;
- int err;
+ u32 *qpc;
mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
- qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
- if (!qpc) {
- err = -ENOMEM;
- goto out;
- }
+ qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
@@ -745,33 +722,22 @@ static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
- err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
- &conn->qp.mqp);
- if (err) {
- mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
- goto out;
- }
+ MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.qpn);
-out:
- kfree(qpc);
- return err;
+ return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
}
-static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
{
struct mlx5_fpga_device *fdev = conn->fdev;
+ u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
struct mlx5_core_dev *mdev = fdev->mdev;
- u32 *qpc = NULL;
- u32 opt_mask;
- int err;
+ u32 *qpc;
mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
- qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
- if (!qpc) {
- err = -ENOMEM;
- goto out;
- }
+ qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
@@ -781,17 +747,11 @@ static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
MLX5_SET(qpc, qpc, retry_count, 7);
MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
- opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
- err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
- &conn->qp.mqp);
- if (err) {
- mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
- goto out;
- }
+ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.qpn);
+ MLX5_SET(rtr2rts_qp_in, in, opt_param_mask, MLX5_QP_OPTPAR_RNR_TIMEOUT);
-out:
- kfree(qpc);
- return err;
+ return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
}
static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
@@ -931,7 +891,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
- MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.qpn);
MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
@@ -972,19 +932,11 @@ out:
void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
{
- struct mlx5_fpga_device *fdev = conn->fdev;
- struct mlx5_core_dev *mdev = fdev->mdev;
- int err = 0;
-
conn->qp.active = false;
tasklet_disable(&conn->cq.tasklet);
synchronize_irq(conn->cq.mcq.irqn);
mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
- err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
- &conn->qp.mqp);
- if (err)
- mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
mlx5_fpga_conn_destroy_qp(conn);
mlx5_fpga_conn_destroy_cq(conn);
@@ -1026,7 +978,7 @@ int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
goto err_dealloc_pd;
}
- mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+ mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey);
return 0;
@@ -1042,7 +994,7 @@ out:
void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
{
- mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+ mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey);
mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
mlx5_nic_vport_disable_roce(fdev->mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
index 634ae10e287b..5116e869a6e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -65,7 +65,7 @@ struct mlx5_fpga_conn {
int sgid_index;
struct mlx5_wq_qp wq;
struct mlx5_wq_ctrl wq_ctrl;
- struct mlx5_core_qp mqp;
+ u32 qpn;
struct {
spinlock_t lock; /* Protects all SQ state */
unsigned int pc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 2ce4241459ce..39c03dcbd196 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/driver.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 52c9dee91ea4..750c32050165 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -54,12 +54,9 @@ struct mlx5_fpga_device {
/* QP Connection resources */
struct {
u32 pdn;
- struct mlx5_core_mkey mkey;
+ u32 mkey;
struct mlx5_uars_page *uar;
} conn_res;
-
- struct mlx5_fpga_ipsec *ipsec;
- struct mlx5_fpga_tls *tls;
};
#define mlx5_fpga_dbg(__adev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
deleted file mode 100644
index 4c61d25d2e88..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ /dev/null
@@ -1,1539 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/rhashtable.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs_helpers.h>
-#include <linux/mlx5/fs.h>
-#include <linux/rbtree.h>
-
-#include "mlx5_core.h"
-#include "fs_cmd.h"
-#include "fpga/ipsec.h"
-#include "fpga/sdk.h"
-#include "fpga/core.h"
-
-enum mlx5_fpga_ipsec_cmd_status {
- MLX5_FPGA_IPSEC_CMD_PENDING,
- MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
- MLX5_FPGA_IPSEC_CMD_COMPLETE,
-};
-
-struct mlx5_fpga_ipsec_cmd_context {
- struct mlx5_fpga_dma_buf buf;
- enum mlx5_fpga_ipsec_cmd_status status;
- struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
- int status_code;
- struct completion complete;
- struct mlx5_fpga_device *dev;
- struct list_head list; /* Item in pending_cmds */
- u8 command[0];
-};
-
-struct mlx5_fpga_esp_xfrm;
-
-struct mlx5_fpga_ipsec_sa_ctx {
- struct rhash_head hash;
- struct mlx5_ifc_fpga_ipsec_sa hw_sa;
- struct mlx5_core_dev *dev;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
-};
-
-struct mlx5_fpga_esp_xfrm {
- unsigned int num_rules;
- struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
- struct mutex lock; /* xfrm lock */
- struct mlx5_accel_esp_xfrm accel_xfrm;
-};
-
-struct mlx5_fpga_ipsec_rule {
- struct rb_node node;
- struct fs_fte *fte;
- struct mlx5_fpga_ipsec_sa_ctx *ctx;
-};
-
-static const struct rhashtable_params rhash_sa = {
- /* Keep out "cmd" field from the key as it's
- * value is not constant during the lifetime
- * of the key object.
- */
- .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
- sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
- .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
- sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
- .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
- .automatic_shrinking = true,
- .min_size = 1,
-};
-
-struct mlx5_fpga_ipsec {
- struct mlx5_fpga_device *fdev;
- struct list_head pending_cmds;
- spinlock_t pending_cmds_lock; /* Protects pending_cmds */
- u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
- struct mlx5_fpga_conn *conn;
-
- struct notifier_block fs_notifier_ingress_bypass;
- struct notifier_block fs_notifier_egress;
-
- /* Map hardware SA --> SA context
- * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx)
- * We will use this hash to avoid SAs duplication in fpga which
- * aren't allowed
- */
- struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
- struct mutex sa_hash_lock;
-
- /* Tree holding all rules for this fpga device
- * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
- */
- struct rb_root rules_rb;
- struct mutex rules_rb_lock; /* rules lock */
-};
-
-static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
- return false;
-
- if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
- return false;
-
- return true;
-}
-
-static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf,
- u8 status)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
-
- if (status) {
- context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
- buf);
- mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
- status);
- context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
- complete(&context->complete);
- }
-}
-
-static inline
-int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
-{
- switch (syndrome) {
- case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
- return 0;
- case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
- return -EEXIST;
- case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
- return -EINVAL;
- case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
- return -EIO;
- }
- return -EIO;
-}
-
-static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
-{
- struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
- struct mlx5_fpga_ipsec_cmd_context *context;
- enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
- struct mlx5_fpga_device *fdev = cb_arg;
- unsigned long flags;
-
- if (buf->sg[0].size < sizeof(*resp)) {
- mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
- buf->sg[0].size, sizeof(*resp));
- return;
- }
-
- mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
- ntohl(resp->syndrome));
-
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
- struct mlx5_fpga_ipsec_cmd_context,
- list);
- if (context)
- list_del(&context->list);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
-
- if (!context) {
- mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
- return;
- }
- mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
-
- syndrome = ntohl(resp->syndrome);
- context->status_code = syndrome_to_errno(syndrome);
- context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
- memcpy(&context->resp, resp, sizeof(*resp));
-
- if (context->status_code)
- mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
- syndrome);
-
- complete(&context->complete);
-}
-
-static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
- const void *cmd, int cmd_size)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
- struct mlx5_fpga_device *fdev = mdev->fpga;
- unsigned long flags;
- int res;
-
- if (!fdev || !fdev->ipsec)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (cmd_size & 3)
- return ERR_PTR(-EINVAL);
-
- context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
- context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
- context->dev = fdev;
- context->buf.complete = mlx5_fpga_ipsec_send_complete;
- init_completion(&context->complete);
- memcpy(&context->command, cmd, cmd_size);
- context->buf.sg[0].size = cmd_size;
- context->buf.sg[0].data = &context->command;
-
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
- if (!res)
- list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
-
- if (res) {
- mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
- kfree(context);
- return ERR_PTR(res);
- }
-
- /* Context should be freed by the caller after completion. */
- return context;
-}
-
-static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
-{
- struct mlx5_fpga_ipsec_cmd_context *context = ctx;
- unsigned long timeout =
- msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
- int res;
-
- res = wait_for_completion_timeout(&context->complete, timeout);
- if (!res) {
- mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
- return -ETIMEDOUT;
- }
-
- if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
- res = context->status_code;
- else
- res = -EIO;
-
- return res;
-}
-
-static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
-{
- if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
- return true;
- return false;
-}
-
-static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
- int opcode)
-{
- struct mlx5_core_dev *dev = fdev->mdev;
- struct mlx5_ifc_fpga_ipsec_sa *sa;
- struct mlx5_fpga_ipsec_cmd_context *cmd_context;
- size_t sa_cmd_size;
- int err;
-
- hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
- if (is_v2_sadb_supported(fdev->ipsec))
- sa_cmd_size = sizeof(*hw_sa);
- else
- sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
-
- cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
- mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
- if (IS_ERR(cmd_context))
- return PTR_ERR(cmd_context);
-
- err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
- if (err)
- goto out;
-
- sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
- if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
- mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
- ntohl(sa->ipsec_sa_v1.sw_sa_handle),
- ntohl(cmd_context->resp.sw_sa_handle));
- err = -EIO;
- }
-
-out:
- kfree(cmd_context);
- return err;
-}
-
-u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- u32 ret = 0;
-
- if (mlx5_fpga_is_ipsec_device(mdev)) {
- ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
- ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
- } else {
- return ret;
- }
-
- if (!fdev->ipsec)
- return ret;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
- ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
- ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
- ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
- ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
- ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
- ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
- }
-
- return ret;
-}
-
-unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!fdev || !fdev->ipsec)
- return 0;
-
- return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- number_of_ipsec_counters);
-}
-
-int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int counters_count)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- unsigned int i;
- __be32 *data;
- u32 count;
- u64 addr;
- int ret;
-
- if (!fdev || !fdev->ipsec)
- return 0;
-
- addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- ipsec_counters_addr_low) +
- ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- ipsec_counters_addr_high) << 32);
-
- count = mlx5_fpga_ipsec_counters_count(mdev);
-
- data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
- MLX5_FPGA_ACCESS_TYPE_DONTCARE);
- if (ret < 0) {
- mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
- ret);
- goto out;
- }
- ret = 0;
-
- if (count > counters_count)
- count = counters_count;
-
- /* Each counter is low word, then high. But each word is big-endian */
- for (i = 0; i < count; i++)
- counters[i] = (u64)ntohl(data[i * 2]) |
- ((u64)ntohl(data[i * 2 + 1]) << 32);
-
-out:
- kfree(data);
- return ret;
-}
-
-static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
- struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
- int err;
-
- cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
- cmd.flags = htonl(flags);
- context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
- if (IS_ERR(context))
- return PTR_ERR(context);
-
- err = mlx5_fpga_ipsec_cmd_wait(context);
- if (err)
- goto out;
-
- if ((context->resp.flags & cmd.flags) != cmd.flags) {
- mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
- cmd.flags,
- context->resp.flags);
- err = -EIO;
- }
-
-out:
- kfree(context);
- return err;
-}
-
-static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
-{
- u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
- u32 flags = 0;
-
- if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
- flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
-
- return mlx5_fpga_ipsec_set_caps(mdev, flags);
-}
-
-static void
-mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
-{
- const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
-
- /* key */
- memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
- aes_gcm->key_len / 8);
- /* Duplicate 128 bit key twice according to HW layout */
- if (aes_gcm->key_len == 128)
- memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
- aes_gcm->aes_key, aes_gcm->key_len / 8);
-
- /* salt and seq_iv */
- memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
- sizeof(aes_gcm->seq_iv));
- memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
- sizeof(aes_gcm->salt));
-
- /* esn */
- if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
- hw_sa->ipsec_sa_v1.flags |=
- (xfrm_attrs->flags &
- MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
- hw_sa->esn = htonl(xfrm_attrs->esn);
- } else {
- hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
- hw_sa->ipsec_sa_v1.flags &=
- ~(xfrm_attrs->flags &
- MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
- hw_sa->esn = 0;
- }
-
- /* rx handle */
- hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
-
- /* enc mode */
- switch (aes_gcm->key_len) {
- case 128:
- hw_sa->ipsec_sa_v1.enc_mode =
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
- break;
- case 256:
- hw_sa->ipsec_sa_v1.enc_mode =
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
- break;
- }
-
- /* flags */
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
- MLX5_FPGA_IPSEC_SA_SPI_EN |
- MLX5_FPGA_IPSEC_SA_IP_ESP;
-
- if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
- else
- hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
-}
-
-static void
-mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
-{
- mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
-
- /* IPs */
- memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
- memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
-
- /* SPI */
- hw_sa->ipsec_sa_v1.spi = spi;
-
- /* flags */
- if (is_ipv6)
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
-}
-
-static bool is_full_mask(const void *p, size_t len)
-{
- WARN_ON(len % 4);
-
- return !memchr_inv(p, 0xff, len);
-}
-
-static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
- const u32 *match_c,
- const u32 *match_v)
-{
- const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- match_c,
- misc_parameters);
- const void *headers_c = MLX5_ADDR_OF(fte_match_param,
- match_c,
- outer_headers);
- const void *headers_v = MLX5_ADDR_OF(fte_match_param,
- match_v,
- outer_headers);
-
- if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
- const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4);
- const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
- ipv4)) ||
- !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
- ipv4)))
- return false;
- } else {
- const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6);
- const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
-
- if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6)) ||
- !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6)))
- return false;
- }
-
- if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- outer_esp_spi),
- MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
- return false;
-
- return true;
-}
-
-static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
- u8 match_criteria_enable,
- const u32 *match_c,
- const u32 *match_v)
-{
- u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
- bool ipv6_flow;
-
- ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
-
- if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
- mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
- mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
- mlx5_fs_is_vxlan_flow(match_c) ||
- !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
- ipv6_flow))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
- mlx5_fs_is_outer_ipsec_flow(match_c))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
- ipv6_flow)
- return false;
-
- if (!validate_fpga_full_mask(dev, match_c, match_v))
- return false;
-
- return true;
-}
-
-static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
- u8 match_criteria_enable,
- const u32 *match_c,
- const u32 *match_v,
- struct mlx5_flow_act *flow_act,
- struct mlx5_flow_context *flow_context)
-{
- const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
- MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
- bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
- MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
- int ret;
-
- ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
- match_v);
- if (!ret)
- return ret;
-
- if (is_dmac || is_smac ||
- (match_criteria_enable &
- ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
- (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
- return false;
-
- return true;
-}
-
-void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6)
-{
- struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- container_of(accel_xfrm, typeof(*fpga_xfrm),
- accel_xfrm);
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- int opcode, err;
- void *context;
-
- /* alloc SA */
- sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
- if (!sa_ctx)
- return ERR_PTR(-ENOMEM);
-
- sa_ctx->dev = mdev;
-
- /* build candidate SA */
- mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
- saddr, daddr, spi, is_ipv6,
- &sa_ctx->hw_sa);
-
- mutex_lock(&fpga_xfrm->lock);
-
- if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */
- /* all rules must be with same IPs and SPI */
- if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
- sizeof(sa_ctx->hw_sa))) {
- context = ERR_PTR(-EINVAL);
- goto exists;
- }
-
- ++fpga_xfrm->num_rules;
- context = fpga_xfrm->sa_ctx;
- goto exists;
- }
-
- /* This is unbounded fpga_xfrm, try to add to hash */
- mutex_lock(&fipsec->sa_hash_lock);
-
- err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa);
- if (err) {
- /* Can't bound different accel_xfrm to already existing sa_ctx.
- * This is because we can't support multiple ketmats for
- * same IPs and SPI
- */
- context = ERR_PTR(-EEXIST);
- goto unlock_hash;
- }
-
- /* Bound accel_xfrm to sa_ctx */
- opcode = is_v2_sadb_supported(fdev->ipsec) ?
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
- sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err) {
- context = ERR_PTR(err);
- goto delete_hash;
- }
-
- mutex_unlock(&fipsec->sa_hash_lock);
-
- ++fpga_xfrm->num_rules;
- fpga_xfrm->sa_ctx = sa_ctx;
- sa_ctx->fpga_xfrm = fpga_xfrm;
-
- mutex_unlock(&fpga_xfrm->lock);
-
- return sa_ctx;
-
-delete_hash:
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa));
-unlock_hash:
- mutex_unlock(&fipsec->sa_hash_lock);
-
-exists:
- mutex_unlock(&fpga_xfrm->lock);
- kfree(sa_ctx);
- return context;
-}
-
-static void *
-mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct fs_fte *fte,
- bool is_egress)
-{
- struct mlx5_accel_esp_xfrm *accel_xfrm;
- __be32 saddr[4], daddr[4], spi;
- struct mlx5_flow_group *fg;
- bool is_ipv6 = false;
-
- fs_get_obj(fg, fte->node.parent);
- /* validate */
- if (is_egress &&
- !mlx5_is_fpga_egress_ipsec_rule(mdev,
- fg->mask.match_criteria_enable,
- fg->mask.match_criteria,
- fte->val,
- &fte->action,
- &fte->flow_context))
- return ERR_PTR(-EINVAL);
- else if (!mlx5_is_fpga_ipsec_rule(mdev,
- fg->mask.match_criteria_enable,
- fg->mask.match_criteria,
- fte->val))
- return ERR_PTR(-EINVAL);
-
- /* get xfrm context */
- accel_xfrm =
- (struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
-
- /* IPs */
- if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
- fte->val)) {
- memcpy(&saddr[3],
- MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- fte->val,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- sizeof(saddr[3]));
- memcpy(&daddr[3],
- MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- fte->val,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- sizeof(daddr[3]));
- } else {
- memcpy(saddr,
- MLX5_ADDR_OF(fte_match_param,
- fte->val,
- outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(saddr));
- memcpy(daddr,
- MLX5_ADDR_OF(fte_match_param,
- fte->val,
- outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(daddr));
- is_ipv6 = true;
- }
-
- /* SPI */
- spi = MLX5_GET_BE(typeof(spi),
- fte_match_param, fte->val,
- misc_parameters.outer_esp_spi);
-
- /* create */
- return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
- saddr, daddr,
- spi, is_ipv6);
-}
-
-static void
-mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
-{
- struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- int opcode = is_v2_sadb_supported(fdev->ipsec) ?
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
- int err;
-
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
- sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err) {
- WARN_ON(err);
- return;
- }
-
- mutex_lock(&fipsec->sa_hash_lock);
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa));
- mutex_unlock(&fipsec->sa_hash_lock);
-}
-
-void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
-
- mutex_lock(&fpga_xfrm->lock);
- if (!--fpga_xfrm->num_rules) {
- mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
- kfree(fpga_xfrm->sa_ctx);
- fpga_xfrm->sa_ctx = NULL;
- }
- mutex_unlock(&fpga_xfrm->lock);
-}
-
-static inline struct mlx5_fpga_ipsec_rule *
-_rule_search(struct rb_root *root, struct fs_fte *fte)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct mlx5_fpga_ipsec_rule *rule =
- container_of(node, struct mlx5_fpga_ipsec_rule,
- node);
-
- if (rule->fte < fte)
- node = node->rb_left;
- else if (rule->fte > fte)
- node = node->rb_right;
- else
- return rule;
- }
- return NULL;
-}
-
-static struct mlx5_fpga_ipsec_rule *
-rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
-{
- struct mlx5_fpga_ipsec_rule *rule;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- rule = _rule_search(&ipsec_dev->rules_rb, fte);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-
- return rule;
-}
-
-static inline int _rule_insert(struct rb_root *root,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- struct rb_node **new = &root->rb_node, *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct mlx5_fpga_ipsec_rule *this =
- container_of(*new, struct mlx5_fpga_ipsec_rule,
- node);
-
- parent = *new;
- if (rule->fte < this->fte)
- new = &((*new)->rb_left);
- else if (rule->fte > this->fte)
- new = &((*new)->rb_right);
- else
- return -EEXIST;
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&rule->node, parent, new);
- rb_insert_color(&rule->node, root);
-
- return 0;
-}
-
-static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- int ret;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- ret = _rule_insert(&ipsec_dev->rules_rb, rule);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-
- return ret;
-}
-
-static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- struct rb_root *root = &ipsec_dev->rules_rb;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- rb_erase(&rule->node, root);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-}
-
-static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- _rule_delete(ipsec_dev, rule);
- kfree(rule);
-}
-
-struct mailbox_mod {
- uintptr_t saved_esp_id;
- u32 saved_action;
- u32 saved_outer_esp_spi_value;
-};
-
-static void restore_spec_mailbox(struct fs_fte *fte,
- struct mailbox_mod *mbox_mod)
-{
- char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
- fte->val,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- mbox_mod->saved_outer_esp_spi_value);
- fte->action.action |= mbox_mod->saved_action;
- fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
-}
-
-static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
- struct fs_fte *fte,
- struct mailbox_mod *mbox_mod)
-{
- char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
- fte->val,
- misc_parameters);
-
- mbox_mod->saved_esp_id = fte->action.esp_id;
- mbox_mod->saved_action = fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
- mbox_mod->saved_outer_esp_spi_value =
- MLX5_GET(fte_match_set_misc, misc_params_v,
- outer_esp_spi);
-
- fte->action.esp_id = 0;
- fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
- if (!MLX5_CAP_FLOWTABLE(mdev,
- flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
-}
-
-static enum fs_flow_table_type egress_to_fs_ft(bool egress)
-{
- return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
-}
-
-static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg,
- bool is_egress)
-{
- int (*create_flow_group)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft, u32 *in,
- struct mlx5_flow_group *fg) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
- char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
- match_criteria.misc_parameters);
- struct mlx5_core_dev *dev = ns->dev;
- u32 saved_outer_esp_spi_mask;
- u8 match_criteria_enable;
- int ret;
-
- if (MLX5_CAP_FLOWTABLE(dev,
- flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- return create_flow_group(ns, ft, in, fg);
-
- match_criteria_enable =
- MLX5_GET(create_flow_group_in, in, match_criteria_enable);
- saved_outer_esp_spi_mask =
- MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
- if (!match_criteria_enable || !saved_outer_esp_spi_mask)
- return create_flow_group(ns, ft, in, fg);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
-
- if (!(*misc_params_c) &&
- !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
- MLX5_SET(create_flow_group_in, in, match_criteria_enable,
- match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
-
- ret = create_flow_group(ns, ft, in, fg);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
- MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
-
- return ret;
-}
-
-static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*create_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
- struct mlx5_core_dev *dev = ns->dev;
- struct mlx5_fpga_device *fdev = dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_ipsec_rule *rule;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return create_fte(ns, ft, fg, fte);
-
- rule = kzalloc(sizeof(*rule), GFP_KERNEL);
- if (!rule)
- return -ENOMEM;
-
- rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
- if (IS_ERR(rule->ctx)) {
- int err = PTR_ERR(rule->ctx);
- kfree(rule);
- return err;
- }
-
- rule->fte = fte;
- WARN_ON(rule_insert(fipsec, rule));
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = create_fte(ns, ft, fg, fte);
- restore_spec_mailbox(fte, &mbox_mod);
- if (ret) {
- _rule_delete(fipsec, rule);
- mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
- kfree(rule);
- }
-
- return ret;
-}
-
-static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*update_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
- struct mlx5_core_dev *dev = ns->dev;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return update_fte(ns, ft, fg, modify_mask, fte);
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = update_fte(ns, ft, fg, modify_mask, fte);
- restore_spec_mailbox(fte, &mbox_mod);
-
- return ret;
-}
-
-static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*delete_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
- struct mlx5_core_dev *dev = ns->dev;
- struct mlx5_fpga_device *fdev = dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_ipsec_rule *rule;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return delete_fte(ns, ft, fte);
-
- rule = rule_search(fipsec, fte);
- if (!rule)
- return -ENOENT;
-
- mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
- rule_delete(fipsec, rule);
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = delete_fte(ns, ft, fte);
- restore_spec_mailbox(fte, &mbox_mod);
-
- return ret;
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg)
-{
- return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
- true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_delete_fte(ns, ft, fte, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg)
-{
- return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
- false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_delete_fte(ns, ft, fte, false);
-}
-
-static struct mlx5_flow_cmds fpga_ipsec_ingress;
-static struct mlx5_flow_cmds fpga_ipsec_egress;
-
-const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- switch (type) {
- case FS_FT_NIC_RX:
- return &fpga_ipsec_ingress;
- case FS_FT_NIC_TX:
- return &fpga_ipsec_egress;
- default:
- WARN_ON(true);
- return NULL;
- }
-}
-
-int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_conn_attr init_attr = {0};
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_conn *conn;
- int err;
-
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return 0;
-
- fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
- if (!fdev->ipsec)
- return -ENOMEM;
-
- fdev->ipsec->fdev = fdev;
-
- err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
- fdev->ipsec->caps);
- if (err) {
- mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
- err);
- goto error;
- }
-
- INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
- spin_lock_init(&fdev->ipsec->pending_cmds_lock);
-
- init_attr.rx_size = SBU_QP_QUEUE_SIZE;
- init_attr.tx_size = SBU_QP_QUEUE_SIZE;
- init_attr.recv_cb = mlx5_fpga_ipsec_recv;
- init_attr.cb_arg = fdev;
- conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
- if (IS_ERR(conn)) {
- err = PTR_ERR(conn);
- mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
- err);
- goto error;
- }
- fdev->ipsec->conn = conn;
-
- err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
- if (err)
- goto err_destroy_conn;
- mutex_init(&fdev->ipsec->sa_hash_lock);
-
- fdev->ipsec->rules_rb = RB_ROOT;
- mutex_init(&fdev->ipsec->rules_rb_lock);
-
- err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
- if (err) {
- mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
- err);
- goto err_destroy_hash;
- }
-
- return 0;
-
-err_destroy_hash:
- rhashtable_destroy(&fdev->ipsec->sa_hash);
-
-err_destroy_conn:
- mlx5_fpga_sbu_conn_destroy(conn);
-
-error:
- kfree(fdev->ipsec);
- fdev->ipsec = NULL;
- return err;
-}
-
-static void destroy_rules_rb(struct rb_root *root)
-{
- struct mlx5_fpga_ipsec_rule *r, *tmp;
-
- rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
- rb_erase(&r->node, root);
- mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
- kfree(r);
- }
-}
-
-void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return;
-
- destroy_rules_rb(&fdev->ipsec->rules_rb);
- rhashtable_destroy(&fdev->ipsec->sa_hash);
-
- mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
- kfree(fdev->ipsec);
- fdev->ipsec = NULL;
-}
-
-void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
- /* ingress */
- fpga_ipsec_ingress.create_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
- fpga_ipsec_ingress.destroy_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
- fpga_ipsec_ingress.modify_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
- fpga_ipsec_ingress.create_flow_group =
- mlx5_fpga_ipsec_fs_create_flow_group_ingress;
- fpga_ipsec_ingress.destroy_flow_group =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
- fpga_ipsec_ingress.create_fte =
- mlx5_fpga_ipsec_fs_create_fte_ingress;
- fpga_ipsec_ingress.update_fte =
- mlx5_fpga_ipsec_fs_update_fte_ingress;
- fpga_ipsec_ingress.delete_fte =
- mlx5_fpga_ipsec_fs_delete_fte_ingress;
- fpga_ipsec_ingress.update_root_ft =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
-
- /* egress */
- fpga_ipsec_egress.create_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
- fpga_ipsec_egress.destroy_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
- fpga_ipsec_egress.modify_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
- fpga_ipsec_egress.create_flow_group =
- mlx5_fpga_ipsec_fs_create_flow_group_egress;
- fpga_ipsec_egress.destroy_flow_group =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
- fpga_ipsec_egress.create_fte =
- mlx5_fpga_ipsec_fs_create_fte_egress;
- fpga_ipsec_egress.update_fte =
- mlx5_fpga_ipsec_fs_update_fte_egress;
- fpga_ipsec_egress.delete_fte =
- mlx5_fpga_ipsec_fs_delete_fte_egress;
- fpga_ipsec_egress.update_root_ft =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
-}
-
-static int
-mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- if (attrs->tfc_pad) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
- mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.iv_algo !=
- MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
- mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.icv_len != 128) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.key_len != 128 &&
- attrs->keymat.aes_gcm.key_len != 256) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
- return -EOPNOTSUPP;
- }
-
- if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
- (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
- v2_command))) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
-
- if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
- mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
- return ERR_PTR(-EINVAL);
- }
-
- if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
- mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
- return ERR_PTR(-EOPNOTSUPP);
- }
-
- fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
- if (!fpga_xfrm)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&fpga_xfrm->lock);
- memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
- sizeof(fpga_xfrm->accel_xfrm.attrs));
-
- return &fpga_xfrm->accel_xfrm;
-}
-
-void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- container_of(xfrm, struct mlx5_fpga_esp_xfrm,
- accel_xfrm);
- /* assuming no sa_ctx are connected to this xfrm_ctx */
- kfree(fpga_xfrm);
-}
-
-int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- struct mlx5_core_dev *mdev = xfrm->mdev;
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
- struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
-
- int err = 0;
-
- if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
- return 0;
-
- if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
- mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
- return -EOPNOTSUPP;
- }
-
- if (is_v2_sadb_supported(fipsec)) {
- mlx5_core_warn(mdev, "Modify esp is not supported\n");
- return -EOPNOTSUPP;
- }
-
- fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
-
- mutex_lock(&fpga_xfrm->lock);
-
- if (!fpga_xfrm->sa_ctx)
- /* Unbounded xfrm, chane only sw attrs */
- goto change_sw_xfrm_attrs;
-
- /* copy original hw sa */
- memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
- mutex_lock(&fipsec->sa_hash_lock);
- /* remove original hw sa from hash */
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash, rhash_sa));
- /* update hw_sa with new xfrm attrs*/
- mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
- &fpga_xfrm->sa_ctx->hw_sa);
- /* try to insert new hw_sa to hash */
- err = rhashtable_insert_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash, rhash_sa);
- if (err)
- goto rollback_sa;
-
- /* modify device with new hw_sa */
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
- MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
- fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err)
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash,
- rhash_sa));
-rollback_sa:
- if (err) {
- /* return original hw_sa to hash */
- memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
- sizeof(org_hw_sa));
- WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash,
- rhash_sa));
- }
- mutex_unlock(&fipsec->sa_hash_lock);
-
-change_sw_xfrm_attrs:
- if (!err)
- memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
- mutex_unlock(&fpga_xfrm->lock);
- return err;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
deleted file mode 100644
index 382985e65b48..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_FPGA_IPSEC_H__
-#define __MLX5_FPGA_IPSEC_H__
-
-#include "accel/ipsec.h"
-#include "fs_cmd.h"
-
-u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
-unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
-int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int counters_count);
-
-void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6);
-void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
-
-int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
-void mlx5_fpga_ipsec_build_fs_cmds(void);
-
-struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags);
-void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
-int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs);
-
-const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
-
-#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
index 656f96be6e20..89ef592656c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -47,11 +47,12 @@
/**
* enum mlx5_fpga_access_type - Enumerated the different methods possible for
* accessing the device memory address space
+ *
+ * @MLX5_FPGA_ACCESS_TYPE_I2C: Use the slow CX-FPGA I2C bus
+ * @MLX5_FPGA_ACCESS_TYPE_DONTCARE: Use the fastest available method
*/
enum mlx5_fpga_access_type {
- /** Use the slow CX-FPGA I2C bus */
MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
- /** Use the fastest available method */
MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
};
@@ -113,6 +114,7 @@ struct mlx5_fpga_conn_attr {
* subsequent receives.
*/
void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ /** @cb_arg: A context to be passed to recv_cb callback */
void *cb_arg;
};
@@ -145,7 +147,7 @@ void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
/**
* mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
- * @fdev: An FPGA SBU connection
+ * @conn: An FPGA SBU connection
* @buf: The packet buffer
*
* Queues a packet for transmission over an FPGA SBU connection.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
deleted file mode 100644
index 22a2ef111514..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-#include "fpga/tls.h"
-#include "fpga/cmd.h"
-#include "fpga/sdk.h"
-#include "fpga/core.h"
-#include "accel/tls.h"
-
-struct mlx5_fpga_tls_command_context;
-
-typedef void (*mlx5_fpga_tls_command_complete)
- (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *ctx,
- struct mlx5_fpga_dma_buf *resp);
-
-struct mlx5_fpga_tls_command_context {
- struct list_head list;
- /* There is no guarantee on the order between the TX completion
- * and the command response.
- * The TX completion is going to touch cmd->buf even in
- * the case of successful transmission.
- * So instead of requiring separate allocations for cmd
- * and cmd->buf we've decided to use a reference counter
- */
- refcount_t ref;
- struct mlx5_fpga_dma_buf buf;
- mlx5_fpga_tls_command_complete complete;
-};
-
-static void
-mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
-{
- if (refcount_dec_and_test(&ctx->ref))
- kfree(ctx);
-}
-
-static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *resp)
-{
- struct mlx5_fpga_conn *conn = fdev->tls->conn;
- struct mlx5_fpga_tls_command_context *ctx;
- struct mlx5_fpga_tls *tls = fdev->tls;
- unsigned long flags;
-
- spin_lock_irqsave(&tls->pending_cmds_lock, flags);
- ctx = list_first_entry(&tls->pending_cmds,
- struct mlx5_fpga_tls_command_context, list);
- list_del(&ctx->list);
- spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
- ctx->complete(conn, fdev, ctx, resp);
-}
-
-static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf,
- u8 status)
-{
- struct mlx5_fpga_tls_command_context *ctx =
- container_of(buf, struct mlx5_fpga_tls_command_context, buf);
-
- mlx5_fpga_tls_put_command_ctx(ctx);
-
- if (unlikely(status))
- mlx5_fpga_tls_cmd_complete(fdev, NULL);
-}
-
-static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- mlx5_fpga_tls_command_complete complete)
-{
- struct mlx5_fpga_tls *tls = fdev->tls;
- unsigned long flags;
- int ret;
-
- refcount_set(&cmd->ref, 2);
- cmd->complete = complete;
- cmd->buf.complete = mlx5_fpga_cmd_send_complete;
-
- spin_lock_irqsave(&tls->pending_cmds_lock, flags);
- /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
- * to make sure commands are inserted to the tls->pending_cmds list
- * and the command QP in the same order.
- */
- ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
- if (likely(!ret))
- list_add_tail(&cmd->list, &tls->pending_cmds);
- else
- complete(tls->conn, fdev, cmd, NULL);
- spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
-}
-
-/* Start of context identifiers range (inclusive) */
-#define SWID_START 0
-/* End of context identifiers range (exclusive) */
-#define SWID_END BIT(24)
-
-static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
- void *ptr)
-{
- unsigned long flags;
- int ret;
-
- /* TLS metadata format is 1 byte for syndrome followed
- * by 3 bytes of swid (software ID)
- * swid must not exceed 3 bytes.
- * See tls_rxtx.c:insert_pet() for details
- */
- BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
-
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(idr_spinlock, flags);
- ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
- spin_unlock_irqrestore(idr_spinlock, flags);
- idr_preload_end();
-
- return ret;
-}
-
-static void *mlx5_fpga_tls_release_swid(struct idr *idr,
- spinlock_t *idr_spinlock, u32 swid)
-{
- unsigned long flags;
- void *ptr;
-
- spin_lock_irqsave(idr_spinlock, flags);
- ptr = idr_remove(idr, swid);
- spin_unlock_irqrestore(idr_spinlock, flags);
- return ptr;
-}
-
-static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf, u8 status)
-{
- kfree(buf);
-}
-
-static void
-mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- struct mlx5_fpga_dma_buf *resp)
-{
- if (resp) {
- u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
-
- if (syndrome)
- mlx5_fpga_err(fdev,
- "Teardown stream failed with syndrome = %d",
- syndrome);
- }
- mlx5_fpga_tls_put_command_ctx(cmd);
-}
-
-static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
-{
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
- MLX5_BYTE_OFF(tls_flow, ipv6));
-
- MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
- MLX5_SET(tls_cmd, cmd, direction_sx,
- MLX5_GET(tls_flow, flow, direction_sx));
-}
-
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
- u64 rcd_sn)
-{
- struct mlx5_fpga_dma_buf *buf;
- int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
- void *flow;
- void *cmd;
- int ret;
-
- buf = kzalloc(size, GFP_ATOMIC);
- if (!buf)
- return -ENOMEM;
-
- cmd = (buf + 1);
-
- rcu_read_lock();
- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- if (unlikely(!flow)) {
- rcu_read_unlock();
- WARN_ONCE(1, "Received NULL pointer for handle\n");
- kfree(buf);
- return -EINVAL;
- }
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
- rcu_read_unlock();
-
- MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
- MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
- MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
- MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
-
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- buf->complete = mlx_tls_kfree_complete;
-
- ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
- if (ret < 0)
- kfree(buf);
-
- return ret;
-}
-
-static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
- void *flow, u32 swid, gfp_t flags)
-{
- struct mlx5_fpga_tls_command_context *ctx;
- struct mlx5_fpga_dma_buf *buf;
- void *cmd;
-
- ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
- if (!ctx)
- return;
-
- buf = &ctx->buf;
- cmd = (ctx + 1);
- MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
- MLX5_SET(tls_cmd, cmd, swid, swid);
-
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
- kfree(flow);
-
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
-
- mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
- mlx5_fpga_tls_teardown_completion);
-}
-
-void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- gfp_t flags, bool direction_sx)
-{
- struct mlx5_fpga_tls *tls = mdev->fpga->tls;
- void *flow;
-
- if (direction_sx)
- flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock,
- swid);
- else
- flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock,
- swid);
-
- if (!flow) {
- mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
- swid);
- return;
- }
-
- synchronize_rcu(); /* before kfree(flow) */
- mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
-}
-
-enum mlx5_fpga_setup_stream_status {
- MLX5_FPGA_CMD_PENDING,
- MLX5_FPGA_CMD_SEND_FAILED,
- MLX5_FPGA_CMD_RESPONSE_RECEIVED,
- MLX5_FPGA_CMD_ABANDONED,
-};
-
-struct mlx5_setup_stream_context {
- struct mlx5_fpga_tls_command_context cmd;
- atomic_t status;
- u32 syndrome;
- struct completion comp;
-};
-
-static void
-mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- struct mlx5_fpga_dma_buf *resp)
-{
- struct mlx5_setup_stream_context *ctx =
- container_of(cmd, struct mlx5_setup_stream_context, cmd);
- int status = MLX5_FPGA_CMD_SEND_FAILED;
- void *tls_cmd = ctx + 1;
-
- /* If we failed to send to command resp == NULL */
- if (resp) {
- ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
- status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
- }
-
- status = atomic_xchg_release(&ctx->status, status);
- if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
- complete(&ctx->comp);
- return;
- }
-
- mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
- ctx->syndrome);
-
- if (!ctx->syndrome) {
- /* The process was killed while waiting for the context to be
- * added, and the add completed successfully.
- * We need to destroy the HW context, and we can't can't reuse
- * the command context because we might not have received
- * the tx completion yet.
- */
- mlx5_fpga_tls_del_flow(fdev->mdev,
- MLX5_GET(tls_cmd, tls_cmd, swid),
- GFP_ATOMIC,
- MLX5_GET(tls_cmd, tls_cmd,
- direction_sx));
- }
-
- mlx5_fpga_tls_put_command_ctx(cmd);
-}
-
-static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
- struct mlx5_setup_stream_context *ctx)
-{
- struct mlx5_fpga_dma_buf *buf;
- void *cmd = ctx + 1;
- int status, ret = 0;
-
- buf = &ctx->cmd.buf;
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
-
- init_completion(&ctx->comp);
- atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
- ctx->syndrome = -1;
-
- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
- mlx5_fpga_tls_setup_completion);
- wait_for_completion_killable(&ctx->comp);
-
- status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
- if (unlikely(status == MLX5_FPGA_CMD_PENDING))
- /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
- return -EINTR;
-
- if (unlikely(ctx->syndrome))
- ret = -ENOMEM;
-
- mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
- return ret;
-}
-
-static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
- struct mlx5_fpga_dma_buf *buf)
-{
- struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
-
- mlx5_fpga_tls_cmd_complete(fdev, buf);
-}
-
-bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
-{
- if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
- return false;
-
- if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
- return false;
-
- return true;
-}
-
-static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
- u32 *p_caps)
-{
- int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
- u32 caps = 0;
- void *buf;
-
- buf = kzalloc(cap_size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
- if (err)
- goto out;
-
- if (MLX5_GET(tls_extended_cap, buf, tx))
- caps |= MLX5_ACCEL_TLS_TX;
- if (MLX5_GET(tls_extended_cap, buf, rx))
- caps |= MLX5_ACCEL_TLS_RX;
- if (MLX5_GET(tls_extended_cap, buf, tls_v12))
- caps |= MLX5_ACCEL_TLS_V12;
- if (MLX5_GET(tls_extended_cap, buf, tls_v13))
- caps |= MLX5_ACCEL_TLS_V13;
- if (MLX5_GET(tls_extended_cap, buf, lro))
- caps |= MLX5_ACCEL_TLS_LRO;
- if (MLX5_GET(tls_extended_cap, buf, ipv6))
- caps |= MLX5_ACCEL_TLS_IPV6;
-
- if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
- caps |= MLX5_ACCEL_TLS_AES_GCM128;
- if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
- caps |= MLX5_ACCEL_TLS_AES_GCM256;
-
- *p_caps = caps;
- err = 0;
-out:
- kfree(buf);
- return err;
-}
-
-int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_conn_attr init_attr = {0};
- struct mlx5_fpga_conn *conn;
- struct mlx5_fpga_tls *tls;
- int err = 0;
-
- if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
- return 0;
-
- tls = kzalloc(sizeof(*tls), GFP_KERNEL);
- if (!tls)
- return -ENOMEM;
-
- err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
- if (err)
- goto error;
-
- if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
- err = -ENOTSUPP;
- goto error;
- }
-
- init_attr.rx_size = SBU_QP_QUEUE_SIZE;
- init_attr.tx_size = SBU_QP_QUEUE_SIZE;
- init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
- init_attr.cb_arg = fdev;
- conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
- if (IS_ERR(conn)) {
- err = PTR_ERR(conn);
- mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
- err);
- goto error;
- }
-
- tls->conn = conn;
- spin_lock_init(&tls->pending_cmds_lock);
- INIT_LIST_HEAD(&tls->pending_cmds);
-
- idr_init(&tls->tx_idr);
- idr_init(&tls->rx_idr);
- spin_lock_init(&tls->tx_idr_spinlock);
- spin_lock_init(&tls->rx_idr_spinlock);
- fdev->tls = tls;
- return 0;
-
-error:
- kfree(tls);
- return err;
-}
-
-void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!fdev || !fdev->tls)
- return;
-
- mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
- kfree(fdev->tls);
- fdev->tls = NULL;
-}
-
-static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
- struct tls_crypto_info *info,
- __be64 *rcd_sn)
-{
- struct tls12_crypto_info_aes_gcm_128 *crypto_info =
- (struct tls12_crypto_info_aes_gcm_128 *)info;
-
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
- TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
-
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
- crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
- crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
- /* in AES-GCM 128 we need to write the key twice */
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
- TLS_CIPHER_AES_GCM_128_KEY_SIZE,
- crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
- MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
-}
-
-static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
- struct tls_crypto_info *crypto_info)
-{
- __be64 rcd_sn;
-
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
- return -EINVAL;
- mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 swid, u32 tcp_sn)
-{
- u32 caps = mlx5_fpga_tls_device_caps(mdev);
- struct mlx5_setup_stream_context *ctx;
- int ret = -ENOMEM;
- size_t cmd_size;
- void *cmd;
-
- cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
- ctx = kzalloc(cmd_size, GFP_KERNEL);
- if (!ctx)
- goto out;
-
- cmd = ctx + 1;
- ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
- if (ret)
- goto free_ctx;
-
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
-
- MLX5_SET(tls_cmd, cmd, swid, swid);
- MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
-
- return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
-
-free_ctx:
- kfree(ctx);
-out:
- return ret;
-}
-
-int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx)
-{
- struct mlx5_fpga_tls *tls = mdev->fpga->tls;
- int ret = -ENOMEM;
- u32 swid;
-
- if (direction_sx)
- ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock, flow);
- else
- ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock, flow);
-
- if (ret < 0)
- return ret;
-
- swid = ret;
- MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
-
- ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
- start_offload_tcp_sn);
- if (ret && ret != -EINTR)
- goto free_swid;
-
- *p_swid = swid;
- return 0;
-free_swid:
- if (direction_sx)
- mlx5_fpga_tls_release_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock, swid);
- else
- mlx5_fpga_tls_release_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock, swid);
-
- return ret;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
deleted file mode 100644
index 3b2e37bf76fe..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_FPGA_TLS_H__
-#define __MLX5_FPGA_TLS_H__
-
-#include <linux/mlx5/driver.h>
-
-#include <net/tls.h>
-#include "fpga/core.h"
-
-struct mlx5_fpga_tls {
- struct list_head pending_cmds;
- spinlock_t pending_cmds_lock; /* Protects pending_cmds */
- u32 caps;
- struct mlx5_fpga_conn *conn;
-
- struct idr tx_idr;
- struct idr rx_idr;
- spinlock_t tx_idr_spinlock; /* protects the IDR */
- spinlock_t rx_idr_spinlock; /* protects the IDR */
-};
-
-int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx);
-
-void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- gfp_t flags, bool direction_sx);
-
-bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
-int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
-
-static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
-{
- return mdev->fpga->tls->caps;
-}
-
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
- u64 rcd_sn);
-
-#endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index b25465d9e030..32d4c967469c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -36,6 +36,7 @@
#include "fs_core.h"
#include "fs_cmd.h"
+#include "fs_ft_pool.h"
#include "mlx5_core.h"
#include "eswitch.h"
@@ -49,9 +50,13 @@ static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int log_size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
+ int max_fte = ft_attr->max_fte;
+
+ ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1;
+
return 0;
}
@@ -108,9 +113,7 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns,
}
static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
- int reformat_type,
- size_t size,
- void *reformat_data,
+ struct mlx5_pkt_reformat_params *params,
enum mlx5_flow_namespace_type namespace,
struct mlx5_pkt_reformat *pkt_reformat)
{
@@ -151,62 +154,140 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
return 0;
}
+static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ bool ft_id_valid,
+ u32 ft_id)
+{
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+ if (ft_id_valid) {
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ft_id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
+static int
+mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ return 0;
+}
+
+static int
+mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
+{
+ return 0;
+}
+
static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft, u32 underlay_qpn,
bool disconnect)
{
- u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ int err;
if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
underlay_qpn == 0)
return 0;
+ if (ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ !mlx5_lag_is_master(dev))
+ return 0;
+
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
- if (disconnect) {
+ if (disconnect)
MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
- MLX5_SET(set_flow_table_root_in, in, table_id, 0);
- } else {
- MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ else
MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
- }
MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
- if (ft->vport) {
- MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
- MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
+ MLX5_SET(set_flow_table_root_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ if (!err &&
+ ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ mlx5_lag_is_master(dev)) {
+ err = mlx5_cmd_set_slave_root_fdb(dev,
+ mlx5_lag_get_peer_mdev(dev),
+ !disconnect, (!disconnect) ?
+ ft->id : 0);
+ if (err && !disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ns->root_ft->id);
+ mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ }
}
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return err;
}
static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int log_size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
- u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ unsigned int size;
int err;
+ if (ft_attr->max_fte != POOL_NEXT_SIZE)
+ size = roundup_pow_of_two(ft_attr->max_fte);
+ size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte);
+ if (!size)
+ return -ENOSPC;
+
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
+ MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid);
MLX5_SET(create_flow_table_in, in, table_type, ft->type);
MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level);
- MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size);
- if (ft->vport) {
- MLX5_SET(create_flow_table_in, in, vport_number, ft->vport);
- MLX5_SET(create_flow_table_in, in, other_vport, 1);
- }
+ MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0);
+ MLX5_SET(create_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_table_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
en_decap);
@@ -239,38 +320,45 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
break;
}
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
+ err = mlx5_cmd_exec_inout(dev, create_flow_table, in, out);
+ if (!err) {
ft->id = MLX5_GET(create_flow_table_out, out,
table_id);
+ ft->max_fte = size;
+ } else {
+ mlx5_ft_pool_put_sz(ns->dev, size);
+ }
+
return err;
}
static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft)
{
- u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ int err;
MLX5_SET(destroy_flow_table_in, in, opcode,
MLX5_CMD_OP_DESTROY_FLOW_TABLE);
MLX5_SET(destroy_flow_table_in, in, table_type, ft->type);
MLX5_SET(destroy_flow_table_in, in, table_id, ft->id);
- if (ft->vport) {
- MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport);
- MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
- }
+ MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_table_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ err = mlx5_cmd_exec_in(dev, destroy_flow_table, in);
+ if (!err)
+ mlx5_ft_pool_put_sz(ns->dev, ft->max_fte);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return err;
}
static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
- u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
MLX5_SET(modify_flow_table_in, in, opcode,
@@ -289,11 +377,9 @@ static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
flow_table_context.lag_master_next_table_id, 0);
}
} else {
- if (ft->vport) {
- MLX5_SET(modify_flow_table_in, in, vport_number,
- ft->vport);
- MLX5_SET(modify_flow_table_in, in, other_vport, 1);
- }
+ MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(modify_flow_table_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
MLX5_SET(modify_flow_table_in, in, modify_field_select,
MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
if (next_ft) {
@@ -310,7 +396,7 @@ static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
}
}
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_flow_table, in);
}
static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
@@ -318,8 +404,7 @@ static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
u32 *in,
struct mlx5_flow_group *fg)
{
- u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {};
struct mlx5_core_dev *dev = ns->dev;
int err;
@@ -332,7 +417,10 @@ static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
MLX5_SET(create_flow_group_in, in, other_vport, 1);
}
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ MLX5_SET(create_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_group_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+ err = mlx5_cmd_exec_inout(dev, create_flow_group, in, out);
if (!err)
fg->id = MLX5_GET(create_flow_group_out, out,
group_id);
@@ -343,8 +431,7 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_group *fg)
{
- u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
MLX5_SET(destroy_flow_group_in, in, opcode,
@@ -352,12 +439,10 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
MLX5_SET(destroy_flow_group_in, in, group_id, fg->id);
- if (ft->vport) {
- MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
- MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
- }
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_group_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+ return mlx5_cmd_exec_in(dev, destroy_flow_group, in);
}
static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
@@ -374,9 +459,11 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
return 0;
list_for_each_entry(dst, &fte->node.children, node.list) {
- if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_NONE)
continue;
- if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
num_encap++;
num_fwd_destinations++;
@@ -396,6 +483,30 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
return 0;
}
+
+static void
+mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context)
+{
+ void *exe_aso_ctrl;
+ void *execute_aso;
+
+ execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context,
+ execute_aso[0]);
+ MLX5_SET(execute_aso, execute_aso, valid, 1);
+ MLX5_SET(execute_aso, execute_aso, aso_object_id,
+ fte->action.exe_aso.object_id);
+
+ exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id,
+ fte->action.exe_aso.return_reg_id);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type,
+ fte->action.exe_aso.type);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color,
+ fte->action.exe_aso.flow_meter.init_color);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id,
+ fte->action.exe_aso.flow_meter.meter_idx);
+}
+
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
@@ -435,10 +546,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(set_fte_in, in, ignore_flow_level,
!!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL));
- if (ft->vport) {
- MLX5_SET(set_fte_in, in, vport_number, ft->vport);
- MLX5_SET(set_fte_in, in, other_vport, 1);
- }
+ MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(set_fte_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
@@ -467,6 +577,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_hdr->id);
+ MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type,
+ fte->action.crypto.type);
+ MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id,
+ fte->action.crypto.obj_id);
+
vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
@@ -488,21 +603,26 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int list_size = 0;
list_for_each_entry(dst, &fte->node.children, node.list) {
- unsigned int id, type = dst->dest_attr.type;
+ enum mlx5_flow_destination_type type = dst->dest_attr.type;
+ enum mlx5_ifc_flow_destination_type ifc_type;
+ unsigned int id;
if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ continue;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
id = dst->dest_attr.ft_num;
- type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
id = dst->dest_attr.ft->id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
- id = dst->dest_attr.vport.num;
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id_valid,
!!(dst->dest_attr.vport.flags &
@@ -510,6 +630,14 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
+ if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) {
+ /* destination_id is reserved */
+ id = 0;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
+ break;
+ }
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
+ id = dst->dest_attr.vport.num;
if (extended_dest &&
dst->dest_attr.vport.pkt_reformat) {
MLX5_SET(dest_format_struct, in_dests,
@@ -521,12 +649,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
dst->dest_attr.vport.pkt_reformat->id);
}
break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ id = dst->dest_attr.sampler_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ break;
default:
id = dst->dest_attr.tir_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
}
MLX5_SET(dest_format_struct, in_dests, destination_type,
- type);
+ ifc_type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
in_dests += dst_cnt_size;
list_size++;
@@ -561,6 +694,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
list_size);
}
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
+ mlx5_cmd_set_fte_flow_meter(fte, in_flow_context);
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+ }
+
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
err_out:
kvfree(in);
@@ -600,35 +742,33 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct fs_fte *fte)
{
- u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
MLX5_SET(delete_fte_in, in, table_type, ft->type);
MLX5_SET(delete_fte_in, in, table_id, ft->id);
MLX5_SET(delete_fte_in, in, flow_index, fte->index);
- if (ft->vport) {
- MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
- MLX5_SET(delete_fte_in, in, other_vport, 1);
- }
+ MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(delete_fte_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, delete_fte, in);
}
int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
u32 *id)
{
- u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {};
int err;
MLX5_SET(alloc_flow_counter_in, in, opcode,
MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_flow_counter, in, out);
if (!err)
*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
return err;
@@ -641,21 +781,20 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {};
MLX5_SET(dealloc_flow_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, dealloc_flow_counter, in);
}
int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
u64 *packets, u64 *bytes)
{
u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
- MLX5_ST_SZ_BYTES(traffic_counter)] = {0};
- u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+ MLX5_ST_SZ_BYTES(traffic_counter)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
void *stats;
int err = 0;
@@ -683,24 +822,21 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
u32 *out)
{
int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
- u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
MLX5_SET(query_flow_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_FLOW_COUNTER);
- MLX5_SET(query_flow_counter_in, in, op_mod, 0);
MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
- int reformat_type,
- size_t size,
- void *reformat_data,
+ struct mlx5_pkt_reformat_params *params,
enum mlx5_flow_namespace_type namespace,
struct mlx5_pkt_reformat *pkt_reformat)
{
- u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {};
struct mlx5_core_dev *dev = ns->dev;
void *packet_reformat_context_in;
int max_encap_size;
@@ -709,19 +845,20 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
int err;
u32 *in;
- if (namespace == MLX5_FLOW_NAMESPACE_FDB)
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB ||
+ namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS)
max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
else
max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
- if (size > max_encap_size) {
+ if (params->size > max_encap_size) {
mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
- size, max_encap_size);
+ params->size, max_encap_size);
return -EINVAL;
}
- in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
- GFP_KERNEL);
+ in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) +
+ params->size, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -730,18 +867,21 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
reformat = MLX5_ADDR_OF(packet_reformat_context_in,
packet_reformat_context_in,
reformat_data);
- inlen = reformat - (void *)in + size;
+ inlen = reformat - (void *)in + params->size;
- memset(in, 0, inlen);
MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
- reformat_data_size, size);
+ reformat_data_size, params->size);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_type, params->type);
MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
- reformat_type, reformat_type);
- memcpy(reformat, reformat_data, size);
+ reformat_param_0, params->param_0);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_param_1, params->param_1);
+ if (params->data && params->size)
+ memcpy(reformat, params->data, params->size);
- memset(out, 0, sizeof(out));
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
@@ -753,17 +893,15 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
struct mlx5_pkt_reformat *pkt_reformat)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
- u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
- memset(in, 0, sizeof(in));
MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
pkt_reformat->id);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, dealloc_packet_reformat_context, in);
}
static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
@@ -771,7 +909,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
void *modify_actions,
struct mlx5_modify_hdr *modify_hdr)
{
- u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
+ u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {};
int max_actions, actions_size, inlen, err;
struct mlx5_core_dev *dev = ns->dev;
void *actions_in;
@@ -780,15 +918,19 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
switch (namespace) {
case MLX5_FLOW_NAMESPACE_FDB:
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions);
table_type = FS_FT_FDB;
break;
+ case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
case MLX5_FLOW_NAMESPACE_KERNEL:
case MLX5_FLOW_NAMESPACE_BYPASS:
max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_RX;
break;
case MLX5_FLOW_NAMESPACE_EGRESS:
+ case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
+ case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
@@ -796,6 +938,10 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
table_type = FS_FT_ESW_INGRESS_ACL;
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions);
+ table_type = FS_FT_RDMA_TX;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -806,7 +952,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
return -EOPNOTSUPP;
}
- actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions;
+ actions_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions;
inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size;
in = kzalloc(inlen, GFP_KERNEL);
@@ -821,7 +967,6 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
memcpy(actions_in, modify_actions, actions_size);
- memset(out, 0, sizeof(out));
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
@@ -832,17 +977,60 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
struct mlx5_modify_hdr *modify_hdr)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
- u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+ u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
- memset(in, 0, sizeof(in));
MLX5_SET(dealloc_modify_header_context_in, in, opcode,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
modify_hdr->id);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in);
+}
+
+static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_OBJ_TYPE_MATCH_DEFINER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id);
+
+ return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ void *ptr;
+ int err;
+
+ MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type,
+ MLX5_OBJ_TYPE_MATCH_DEFINER);
+
+ ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context);
+ MLX5_SET(match_definer, ptr, format_id, format_id);
+
+ ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask);
+ memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask));
+
+ err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out);
+ return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+}
+
+static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
}
static const struct mlx5_flow_cmds mlx5_flow_cmds = {
@@ -859,9 +1047,12 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
.modify_header_alloc = mlx5_cmd_modify_header_alloc,
.modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_destroy_match_definer,
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_get_capabilities,
};
static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -878,9 +1069,12 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
.packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
.modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
.modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_stub_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_stub_destroy_match_definer,
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_stub_get_capabilities,
};
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
@@ -904,6 +1098,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
case FS_FT_SNIFFER_TX:
case FS_FT_NIC_TX:
case FS_FT_RDMA_RX:
+ case FS_FT_RDMA_TX:
+ case FS_FT_PORT_SEL:
return mlx5_fs_cmd_get_fw_cmds();
default:
return mlx5_fs_cmd_get_stub_cmds();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index d62de642eca9..8ef4254b9ea1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -38,7 +38,7 @@
struct mlx5_flow_cmds {
int (*create_flow_table)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int log_size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft);
int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft);
@@ -77,9 +77,7 @@ struct mlx5_flow_cmds {
bool disconnect);
int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns,
- int reformat_type,
- size_t size,
- void *reformat_data,
+ struct mlx5_pkt_reformat_params *params,
enum mlx5_flow_namespace_type namespace,
struct mlx5_pkt_reformat *pkt_reformat);
@@ -99,6 +97,13 @@ struct mlx5_flow_cmds {
int (*create_ns)(struct mlx5_flow_root_namespace *ns);
int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
+ int (*create_match_definer)(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask);
+ int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
+ int definer_id);
+
+ u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type);
};
int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 9dc24241dc91..d53749248fa0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -38,10 +38,8 @@
#include "mlx5_core.h"
#include "fs_core.h"
#include "fs_cmd.h"
+#include "fs_ft_pool.h"
#include "diag/fs_tracepoint.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
-#include "eswitch.h"
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
@@ -87,36 +85,60 @@
.identified_miss_table_mode), \
FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+#define FS_CHAINING_CAPS_RDMA_TX \
+ FS_REQUIRED_CAPS( \
+ FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root), \
+ FS_CAP(flow_table_properties_nic_transmit_rdma \
+ .identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_transmit_rdma \
+ .flow_table_modify))
+
#define LEFTOVERS_NUM_LEVELS 1
#define LEFTOVERS_NUM_PRIOS 1
+#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1
+#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1
+
#define BY_PASS_PRIO_NUM_LEVELS 1
#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
LEFTOVERS_NUM_PRIOS)
+#define KERNEL_RX_MACSEC_NUM_PRIOS 1
+#define KERNEL_RX_MACSEC_NUM_LEVELS 2
+#define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS)
+
#define ETHTOOL_PRIO_NUM_LEVELS 1
#define ETHTOOL_NUM_PRIOS 11
#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Vlan, mac, ttc, inner ttc, aRFS */
-#define KERNEL_NIC_PRIO_NUM_LEVELS 5
+/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 7
#define KERNEL_NIC_NUM_PRIOS 1
/* One more level for tc */
#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
#define KERNEL_NIC_TC_NUM_PRIOS 1
-#define KERNEL_NIC_TC_NUM_LEVELS 2
+#define KERNEL_NIC_TC_NUM_LEVELS 3
#define ANCHOR_NUM_LEVELS 1
#define ANCHOR_NUM_PRIOS 1
#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
-#define OFFLOADS_MAX_FT 1
-#define OFFLOADS_NUM_PRIOS 1
-#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+#define OFFLOADS_MAX_FT 2
+#define OFFLOADS_NUM_PRIOS 2
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS)
#define LAG_PRIO_NUM_LEVELS 1
#define LAG_NUM_PRIOS 1
-#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1)
+
+#define KERNEL_TX_IPSEC_NUM_PRIOS 1
+#define KERNEL_TX_IPSEC_NUM_LEVELS 1
+#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
+
+#define KERNEL_TX_MACSEC_NUM_PRIOS 1
+#define KERNEL_TX_MACSEC_NUM_LEVELS 2
+#define KERNEL_TX_MACSEC_MIN_LEVEL (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS)
struct node_caps {
size_t arr_sz;
@@ -135,17 +157,21 @@ static struct init_tree_node {
enum mlx5_flow_table_miss_action def_miss_action;
} root_fs = {
.type = FS_TYPE_NAMESPACE,
- .ar_size = 7,
+ .ar_size = 8,
.children = (struct init_tree_node[]){
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS,
+ KERNEL_RX_MACSEC_NUM_LEVELS))),
ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
LAG_PRIO_NUM_LEVELS))),
- ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+ ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS,
OFFLOADS_MAX_FT))),
@@ -172,36 +198,87 @@ static struct init_tree_node {
static struct init_tree_node egress_root_fs = {
.type = FS_TYPE_NAMESPACE,
- .ar_size = 1,
+ .ar_size = 3,
.children = (struct init_tree_node[]) {
ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
+ KERNEL_TX_IPSEC_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS,
+ KERNEL_TX_MACSEC_NUM_LEVELS))),
}
};
-#define RDMA_RX_BYPASS_PRIO 0
-#define RDMA_RX_KERNEL_PRIO 1
+enum {
+ RDMA_RX_COUNTERS_PRIO,
+ RDMA_RX_BYPASS_PRIO,
+ RDMA_RX_KERNEL_PRIO,
+};
+
+#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS
+#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1)
+#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2)
+
static struct init_tree_node rdma_rx_root_fs = {
.type = FS_TYPE_NAMESPACE,
- .ar_size = 2,
+ .ar_size = 3,
.children = (struct init_tree_node[]) {
+ [RDMA_RX_COUNTERS_PRIO] =
+ ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS,
+ RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))),
[RDMA_RX_BYPASS_PRIO] =
- ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
+ ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0,
FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
[RDMA_RX_KERNEL_PRIO] =
- ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
+ ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0,
FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
ADD_MULTIPLE_PRIO(1, 1))),
}
};
+enum {
+ RDMA_TX_COUNTERS_PRIO,
+ RDMA_TX_BYPASS_PRIO,
+};
+
+#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS
+#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1)
+
+static struct init_tree_node rdma_tx_root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 2,
+ .children = (struct init_tree_node[]) {
+ [RDMA_TX_COUNTERS_PRIO] =
+ ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS,
+ RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))),
+ [RDMA_TX_BYPASS_PRIO] =
+ ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_RDMA_TX,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ }
+};
+
enum fs_i_lock_class {
FS_LOCK_GRANDPARENT,
FS_LOCK_PARENT,
@@ -233,7 +310,7 @@ static void del_sw_flow_group(struct fs_node *node);
static void del_sw_fte(struct fs_node *node);
static void del_sw_prio(struct fs_node *node);
static void del_sw_ns(struct fs_node *node);
-/* Delete rule (destination) is special case that
+/* Delete rule (destination) is special case that
* requires to lock the FTE for all the deletion process.
*/
static void del_sw_hw_rule(struct fs_node *node);
@@ -323,17 +400,12 @@ static void tree_put_node(struct fs_node *node, bool locked)
if (node->del_hw_func)
node->del_hw_func(node);
if (parent_node) {
- /* Only root namespace doesn't have parent and we just
- * need to free its node.
- */
down_write_ref_node(parent_node, locked);
list_del_init(&node->list);
- if (node->del_sw_func)
- node->del_sw_func(node);
- up_write_ref_node(parent_node, locked);
- } else {
- kfree(node);
}
+ node->del_sw_func(node);
+ if (parent_node)
+ up_write_ref_node(parent_node, locked);
node = NULL;
}
if (!node && parent_node)
@@ -363,6 +435,22 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
return NULL;
}
+static bool is_fwd_next_action(u32 action)
+{
+ return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
+}
+
+static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
+{
+ return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM ||
+ type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE ||
+ type == MLX5_FLOW_DESTINATION_TYPE_UPLINK ||
+ type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
+ type == MLX5_FLOW_DESTINATION_TYPE_TIR;
+}
+
static bool check_valid_spec(const struct mlx5_flow_spec *spec)
{
int i;
@@ -376,7 +464,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec)
return true;
}
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
{
struct fs_node *root;
struct mlx5_flow_namespace *ns;
@@ -447,8 +535,10 @@ static void del_sw_flow_table(struct fs_node *node)
fs_get_obj(ft, node);
rhltable_destroy(&ft->fgs_hash);
- fs_get_obj(prio, ft->node.parent);
- prio->num_ft--;
+ if (ft->node.parent) {
+ fs_get_obj(prio, ft->node.parent);
+ prio->num_ft--;
+ }
kfree(ft);
}
@@ -481,14 +571,14 @@ static void del_sw_hw_rule(struct fs_node *node)
fs_get_obj(rule, node);
fs_get_obj(fte, rule->node.parent);
trace_mlx5_fs_del_rule(rule);
- if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (is_fwd_next_action(rule->sw_action)) {
mutex_lock(&rule->dest_attr.ft->lock);
list_del(&rule->next_ft);
mutex_unlock(&rule->dest_attr.ft->lock);
}
- if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
- --fte->dests_size) {
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) {
+ --fte->dests_size;
fte->modify_mask |=
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
@@ -496,10 +586,23 @@ static void del_sw_hw_rule(struct fs_node *node)
goto out;
}
- if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
- --fte->dests_size) {
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ --fte->dests_size;
+ fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ goto out;
+ }
+
+ if (is_fwd_dest_type(rule->dest_attr.type)) {
+ --fte->dests_size;
+ --fte->fwd_dests;
+
+ if (!fte->fwd_dests)
+ fte->action.action &=
+ ~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
fte->modify_mask |=
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ goto out;
}
out:
kfree(rule);
@@ -519,6 +622,7 @@ static void del_hw_fte(struct fs_node *node)
fs_get_obj(ft, fg->node.parent);
trace_mlx5_fs_del_fte(fte);
+ WARN_ON(fte->dests_size);
dev = get_dev(&ft->node);
root = find_root(&ft->node);
if (node->active) {
@@ -527,7 +631,7 @@ static void del_hw_fte(struct fs_node *node)
mlx5_core_warn(dev,
"flow steering can't delete fte in index %d of flow group id %d\n",
fte->index, fg->id);
- node->active = 0;
+ node->active = false;
}
}
@@ -545,7 +649,7 @@ static void del_sw_fte(struct fs_node *node)
&fte->hash,
rhash_fte);
WARN_ON(err);
- ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
+ ida_free(&fg->fte_allocator, fte->index - fg->start_index);
kmem_cache_free(steering->ftes_cache, fte);
}
@@ -595,7 +699,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
int index;
int ret;
- index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
+ index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL);
if (index < 0)
return index;
@@ -611,7 +715,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
return 0;
err_ida_remove:
- ida_simple_remove(&fg->fte_allocator, index);
+ ida_free(&fg->fte_allocator, index);
return ret;
}
@@ -631,7 +735,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
fte->action = *flow_act;
fte->flow_context = spec->flow_context;
- tree_init_node(&fte->node, NULL, del_sw_fte);
+ tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
return fte;
}
@@ -707,7 +811,7 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f
return fg;
}
-static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
+static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport,
enum fs_flow_table_type table_type,
enum fs_flow_table_op_mod op_mod,
u32 flags)
@@ -730,7 +834,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
ft->op_mod = op_mod;
ft->type = table_type;
ft->vport = vport;
- ft->max_fte = max_fte;
ft->flags = flags;
INIT_LIST_HEAD(&ft->fwd_rules);
mutex_init(&ft->lock);
@@ -773,7 +876,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
return ft;
}
-/* If reverse if false then return the first flow table in next priority of
+/* If reverse is false then return the first flow table in next priority of
* prio in the tree, else return the last flow table in the previous priority
* of prio in the tree.
*/
@@ -805,24 +908,33 @@ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
return find_closest_ft(prio, true);
}
+static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+ struct mlx5_flow_act *flow_act)
+{
+ struct fs_prio *prio;
+ bool next_ns;
+
+ next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+ fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
+
+ return find_next_chained_ft(prio);
+}
+
static int connect_fts_in_prio(struct mlx5_core_dev *dev,
struct fs_prio *prio,
struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
struct mlx5_flow_table *iter;
- int i = 0;
int err;
fs_for_each_ft(iter, prio) {
- i++;
err = root->cmds->modify_flow_table(root, iter, ft);
if (err) {
- mlx5_core_warn(dev, "Failed to modify flow table %d\n",
- iter->id);
+ mlx5_core_err(dev,
+ "Failed to modify flow table id %d, type %d, err %d\n",
+ iter->id, iter->type, err);
/* The driver is out of sync with the FW */
- if (i > 1)
- WARN_ON(true);
return err;
}
}
@@ -955,6 +1067,10 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
mutex_unlock(&old_next_ft->lock);
list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+ if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) &&
+ iter->ft->ns == new_next_ft->ns)
+ continue;
+
err = _mlx5_modify_rule_destination(iter, &dest);
if (err)
pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
@@ -966,17 +1082,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
- struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_table *next_ft, *first_ft;
int err = 0;
/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
- if (list_empty(&prio->node.children)) {
+ first_ft = list_first_entry_or_null(&prio->node.children,
+ struct mlx5_flow_table, node.list);
+ if (!first_ft || first_ft->level > ft->level) {
err = connect_prev_fts(dev, ft, prio);
if (err)
return err;
- next_ft = find_next_chained_ft(prio);
+ next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
err = connect_fwd_rules(dev, ft, next_ft);
if (err)
return err;
@@ -1012,7 +1130,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
struct mlx5_flow_table *next_ft;
struct fs_prio *fs_prio = NULL;
struct mlx5_flow_table *ft;
- int log_table_sz;
int err;
if (!root) {
@@ -1043,7 +1160,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
*/
ft = alloc_flow_table(ft_attr->level,
vport,
- ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
root->table_type,
op_mod, ft_attr->flags);
if (IS_ERR(ft)) {
@@ -1052,11 +1168,11 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
}
tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
- log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
next_ft = unmanaged ? ft_attr->next_ft :
find_next_chained_ft(fs_prio);
ft->def_miss_action = ns->def_miss_action;
- err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
+ ft->ns = ns;
+ err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
if (err)
goto free_ft;
@@ -1082,6 +1198,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
destroy_ft:
root->cmds->destroy_flow_table(root, ft);
free_ft:
+ rhltable_destroy(&ft->fgs_hash);
kfree(ft);
unlock_root:
mutex_unlock(&root->chain_lock);
@@ -1093,18 +1210,19 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
{
return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
}
+EXPORT_SYMBOL(mlx5_create_flow_table);
-struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
- int prio, int max_fte,
- u32 level, u16 vport)
+u32 mlx5_flow_table_id(struct mlx5_flow_table *ft)
{
- struct mlx5_flow_table_attr ft_attr = {};
-
- ft_attr.max_fte = max_fte;
- ft_attr.level = level;
- ft_attr.prio = prio;
+ return ft->id;
+}
+EXPORT_SYMBOL(mlx5_flow_table_id);
- return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
+struct mlx5_flow_table *
+mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr, u16 vport)
+{
+ return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, vport);
}
struct mlx5_flow_table*
@@ -1115,28 +1233,36 @@ mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
ft_attr.level = level;
ft_attr.prio = prio;
+ ft_attr.max_fte = 1;
+
return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
}
EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
+#define MAX_FLOW_GROUP_SIZE BIT(24)
struct mlx5_flow_table*
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr)
{
int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
- int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries;
int max_num_groups = ft_attr->autogroup.max_num_groups;
struct mlx5_flow_table *ft;
-
- if (max_num_groups > autogroups_max_fte)
- return ERR_PTR(-EINVAL);
- if (num_reserved_entries > ft_attr->max_fte)
- return ERR_PTR(-EINVAL);
+ int autogroups_max_fte;
ft = mlx5_create_flow_table(ns, ft_attr);
if (IS_ERR(ft))
return ft;
+ autogroups_max_fte = ft->max_fte - num_reserved_entries;
+ if (max_num_groups > autogroups_max_fte)
+ goto err_validate;
+ if (num_reserved_entries > ft->max_fte)
+ goto err_validate;
+
+ /* Align the number of groups according to the largest group size */
+ if (autogroups_max_fte / (max_num_groups + 1) > MAX_FLOW_GROUP_SIZE)
+ max_num_groups = (autogroups_max_fte / MAX_FLOW_GROUP_SIZE) - 1;
+
ft->autogroup.active = true;
ft->autogroup.required_groups = max_num_groups;
ft->autogroup.max_fte = autogroups_max_fte;
@@ -1144,6 +1270,10 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
return ft;
+
+err_validate:
+ mlx5_destroy_flow_table(ft);
+ return ERR_PTR(-ENOSPC);
}
EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
@@ -1184,6 +1314,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
return fg;
}
+EXPORT_SYMBOL(mlx5_create_flow_group);
static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
{
@@ -1197,6 +1328,8 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
rule->node.type = FS_TYPE_FLOW_DEST;
if (dest)
memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ else
+ rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE;
return rule;
}
@@ -1273,6 +1406,9 @@ create_flow_handle(struct fs_fte *fte,
if (dest) {
fte->dests_size++;
+ if (is_fwd_dest_type(dest[i].type))
+ fte->fwd_dests++;
+
type = dest[i].type ==
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
*modify_mask |= type ? count : dst;
@@ -1322,7 +1458,7 @@ add_rule_fte(struct fs_fte *fte,
fte->node.active = true;
fte->status |= FS_FTE_STATUS_EXISTING;
- atomic_inc(&fte->node.version);
+ atomic_inc(&fg->node.version);
out:
return handle;
@@ -1426,7 +1562,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
if (d1->type == d2->type) {
- if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
d1->vport.num == d2->vport.num &&
d1->vport.flags == d2->vport.flags &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
@@ -1439,7 +1576,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
d1->tir_num == d2->tir_num) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
- d1->ft_num == d2->ft_num))
+ d1->ft_num == d2->ft_num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER &&
+ d1->sampler_id == d2->sampler_id))
return true;
}
@@ -1458,9 +1597,22 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
return NULL;
}
-static bool check_conflicting_actions(u32 action1, u32 action2)
+static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0,
+ const struct mlx5_fs_vlan *vlan1)
{
- u32 xored_actions = action1 ^ action2;
+ return vlan0->ethtype != vlan1->ethtype ||
+ vlan0->vid != vlan1->vid ||
+ vlan0->prio != vlan1->prio;
+}
+
+static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
+ const struct mlx5_flow_act *act2)
+{
+ u32 action1 = act1->action;
+ u32 action2 = act2->action;
+ u32 xored_actions;
+
+ xored_actions = action1 ^ action2;
/* if one rule only wants to count, it's ok */
if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
@@ -1477,6 +1629,22 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
return true;
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
+ act1->pkt_reformat != act2->pkt_reformat)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ act1->modify_hdr != act2->modify_hdr)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0]))
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 &&
+ check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1]))
+ return true;
+
return false;
}
@@ -1484,7 +1652,7 @@ static int check_conflicting_ftes(struct fs_fte *fte,
const struct mlx5_flow_context *flow_context,
const struct mlx5_flow_act *flow_act)
{
- if (check_conflicting_actions(flow_act->action, fte->action.action)) {
+ if (check_conflicting_actions(flow_act, &fte->action)) {
mlx5_core_warn(get_dev(&fte->node),
"Found two FTEs with conflicting actions\n");
return -EEXIST;
@@ -1540,6 +1708,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
static bool counter_is_valid(u32 action)
{
return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
}
@@ -1557,11 +1726,12 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
return true;
if (ignore_level) {
- if (ft->type != FS_FT_FDB)
+ if (ft->type != FS_FT_FDB &&
+ ft->type != FS_FT_NIC_RX)
return false;
if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
- dest->ft->type != FS_FT_FDB)
+ ft->type != dest->ft->type)
return false;
}
@@ -1577,30 +1747,22 @@ struct match_list {
struct mlx5_flow_group *g;
};
-struct match_list_head {
- struct list_head list;
- struct match_list first;
-};
-
-static void free_match_list(struct match_list_head *head, bool ft_locked)
+static void free_match_list(struct match_list *head, bool ft_locked)
{
- if (!list_empty(&head->list)) {
- struct match_list *iter, *match_tmp;
+ struct match_list *iter, *match_tmp;
- list_del(&head->first.list);
- tree_put_node(&head->first.g->node, ft_locked);
- list_for_each_entry_safe(iter, match_tmp, &head->list,
- list) {
- tree_put_node(&iter->g->node, ft_locked);
- list_del(&iter->list);
- kfree(iter);
- }
+ list_for_each_entry_safe(iter, match_tmp, &head->list,
+ list) {
+ tree_put_node(&iter->g->node, ft_locked);
+ list_del(&iter->list);
+ kfree(iter);
}
}
-static int build_match_list(struct match_list_head *match_head,
+static int build_match_list(struct match_list *match_head,
struct mlx5_flow_table *ft,
const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_group *fg,
bool ft_locked)
{
struct rhlist_head *tmp, *list;
@@ -1615,29 +1777,21 @@ static int build_match_list(struct match_list_head *match_head,
rhl_for_each_entry_rcu(g, tmp, list, hash) {
struct match_list *curr_match;
- if (likely(list_empty(&match_head->list))) {
- if (!tree_get_node(&g->node))
- continue;
- match_head->first.g = g;
- list_add_tail(&match_head->first.list,
- &match_head->list);
+ if (fg && fg != g)
+ continue;
+
+ if (unlikely(!tree_get_node(&g->node)))
continue;
- }
curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
if (!curr_match) {
+ rcu_read_unlock();
free_match_list(match_head, ft_locked);
- err = -ENOMEM;
- goto out;
- }
- if (!tree_get_node(&g->node)) {
- kfree(curr_match);
- continue;
+ return -ENOMEM;
}
curr_match->g = g;
list_add_tail(&curr_match->list, &match_head->list);
}
-out:
rcu_read_unlock();
return err;
}
@@ -1699,7 +1853,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct match_list *iter;
bool take_write = false;
struct fs_fte *fte;
- u64 version;
+ u64 version = 0;
int err;
fte = alloc_fte(ft, spec, flow_act);
@@ -1707,10 +1861,12 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
return ERR_PTR(-ENOMEM);
search_again_locked:
- version = matched_fgs_get_version(match_head);
if (flow_act->flags & FLOW_ACT_NO_APPEND)
goto skip_search;
- /* Try to find a fg that already contains a matching fte */
+ version = matched_fgs_get_version(match_head);
+ /* Try to find an fte with identical match value and attempt update its
+ * action.
+ */
list_for_each_entry(iter, match_head, list) {
struct fs_fte *fte_tmp;
@@ -1719,6 +1875,7 @@ search_again_locked:
if (!fte_tmp)
continue;
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
+ /* No error check needed here, because insert_fte() is not called */
up_write_ref_node(&fte_tmp->node, false);
tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
@@ -1738,10 +1895,12 @@ skip_search:
goto out;
}
- /* Check the fgs version, for case the new FTE with the
- * same values was added while the fgs weren't locked
+ /* Check the fgs version. If version have changed it could be that an
+ * FTE with the same match value was added while the fgs weren't
+ * locked.
*/
- if (version != matched_fgs_get_version(match_head)) {
+ if (!(flow_act->flags & FLOW_ACT_NO_APPEND) &&
+ version != matched_fgs_get_version(match_head)) {
take_write = true;
goto search_again_locked;
}
@@ -1749,11 +1908,13 @@ skip_search:
list_for_each_entry(iter, match_head, list) {
g = iter->g;
- if (!g->node.active)
- continue;
-
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ if (!g->node.active) {
+ up_write_ref_node(&g->node, false);
+ continue;
+ }
+
err = insert_fte(g, fte);
if (err) {
up_write_ref_node(&g->node, false);
@@ -1767,7 +1928,8 @@ skip_search:
up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
- tree_put_node(&fte->node, false);
+ if (IS_ERR(rule))
+ tree_put_node(&fte->node, false);
return rule;
}
rule = ERR_PTR(-ENOENT);
@@ -1785,9 +1947,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
- struct mlx5_flow_group *g;
struct mlx5_flow_handle *rule;
- struct match_list_head match_head;
+ struct match_list match_head;
+ struct mlx5_flow_group *g;
bool take_write = false;
struct fs_fte *fte;
int version;
@@ -1797,6 +1959,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
if (!check_valid_spec(spec))
return ERR_PTR(-EINVAL);
+ if (flow_act->fg && ft->autogroup.active)
+ return ERR_PTR(-EINVAL);
+
for (i = 0; i < dest_num; i++) {
if (!dest_is_valid(&dest[i], flow_act, ft))
return ERR_PTR(-EINVAL);
@@ -1806,7 +1971,7 @@ search_again_locked:
version = atomic_read(&ft->node.version);
/* Collect all fgs which has a matching match_criteria */
- err = build_match_list(&match_head, ft, spec, take_write);
+ err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write);
if (err) {
if (take_write)
up_write_ref_node(&ft->node, false);
@@ -1866,7 +2031,8 @@ search_again_locked:
up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
- tree_put_node(&fte->node, false);
+ if (IS_ERR(rule))
+ tree_put_node(&fte->node, false);
tree_put_node(&g->node, false);
return rule;
@@ -1892,45 +2058,60 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
int num_dest)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
- struct mlx5_flow_destination gen_dest = {};
+ static const struct mlx5_flow_spec zero_spec = {};
+ struct mlx5_flow_destination *gen_dest = NULL;
struct mlx5_flow_table *next_ft = NULL;
struct mlx5_flow_handle *handle = NULL;
u32 sw_action = flow_act->action;
- struct fs_prio *prio;
+ int i;
- fs_get_obj(prio, ft->node.parent);
- if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
- if (!fwd_next_prio_supported(ft))
- return ERR_PTR(-EOPNOTSUPP);
- if (num_dest)
- return ERR_PTR(-EINVAL);
- mutex_lock(&root->chain_lock);
- next_ft = find_next_chained_ft(prio);
- if (next_ft) {
- gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- gen_dest.ft = next_ft;
- dest = &gen_dest;
- num_dest = 1;
- flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else {
- mutex_unlock(&root->chain_lock);
- return ERR_PTR(-EOPNOTSUPP);
- }
- }
+ if (!spec)
+ spec = &zero_spec;
- handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+ if (!is_fwd_next_action(sw_action))
+ return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
- if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
- if (!IS_ERR_OR_NULL(handle) &&
- (list_empty(&handle->rule[0]->next_ft))) {
- mutex_lock(&next_ft->lock);
- list_add(&handle->rule[0]->next_ft,
- &next_ft->fwd_rules);
- mutex_unlock(&next_ft->lock);
- handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- }
- mutex_unlock(&root->chain_lock);
- }
+ if (!fwd_next_prio_supported(ft))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&root->chain_lock);
+ next_ft = find_next_fwd_ft(ft, flow_act);
+ if (!next_ft) {
+ handle = ERR_PTR(-EOPNOTSUPP);
+ goto unlock;
+ }
+
+ gen_dest = kcalloc(num_dest + 1, sizeof(*dest),
+ GFP_KERNEL);
+ if (!gen_dest) {
+ handle = ERR_PTR(-ENOMEM);
+ goto unlock;
+ }
+ for (i = 0; i < num_dest; i++)
+ gen_dest[i] = dest[i];
+ gen_dest[i].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ gen_dest[i].ft = next_ft;
+ dest = gen_dest;
+ num_dest++;
+ flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+ if (IS_ERR(handle))
+ goto unlock;
+
+ if (list_empty(&handle->rule[num_dest - 1]->next_ft)) {
+ mutex_lock(&next_ft->lock);
+ list_add(&handle->rule[num_dest - 1]->next_ft,
+ &next_ft->fwd_rules);
+ mutex_unlock(&next_ft->lock);
+ handle->rule[num_dest - 1]->sw_action = sw_action;
+ handle->rule[num_dest - 1]->ft = ft;
+ }
+unlock:
+ mutex_unlock(&root->chain_lock);
+ kfree(gen_dest);
return handle;
}
EXPORT_SYMBOL(mlx5_add_flow_rules);
@@ -1956,13 +2137,18 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
down_write_ref_node(&fte->node, false);
for (i = handle->num_rules - 1; i >= 0; i--)
tree_remove_node(&handle->rule[i]->node, true);
- if (fte->modify_mask && fte->dests_size) {
- modify_fte(fte);
+ if (list_empty(&fte->node.children)) {
+ fte->node.del_hw_func(&fte->node);
+ /* Avoid double call to del_hw_fte */
+ fte->node.del_hw_func = NULL;
up_write_ref_node(&fte->node, false);
- } else {
- del_hw_fte(&fte->node);
- up_write(&fte->node.lock);
tree_put_node(&fte->node, false);
+ } else if (fte->dests_size) {
+ if (fte->modify_mask)
+ modify_fte(fte);
+ up_write_ref_node(&fte->node, false);
+ } else {
+ up_write_ref_node(&fte->node, false);
}
kfree(handle);
}
@@ -2043,7 +2229,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
node.list) == ft))
return 0;
- next_ft = find_next_chained_ft(prio);
+ next_ft = find_next_ft(ft);
err = connect_fwd_rules(dev, next_ft, ft);
if (err)
return err;
@@ -2082,6 +2268,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
fg->id);
}
+EXPORT_SYMBOL(mlx5_destroy_flow_group);
struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
int n)
@@ -2095,6 +2282,23 @@ struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type)
+{
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
+ case MLX5_FLOW_NAMESPACE_LAG:
+ case MLX5_FLOW_NAMESPACE_OFFLOADS:
+ case MLX5_FLOW_NAMESPACE_ETHTOOL:
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ case MLX5_FLOW_NAMESPACE_ANCHOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type)
{
@@ -2112,6 +2316,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
if (steering->fdb_root_ns)
return &steering->fdb_root_ns->ns;
return NULL;
+ case MLX5_FLOW_NAMESPACE_PORT_SEL:
+ if (steering->port_sel_root_ns)
+ return &steering->port_sel_root_ns->ns;
+ return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
if (steering->sniffer_rx_root_ns)
return &steering->sniffer_rx_root_ns->ns;
@@ -2120,21 +2328,40 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
if (steering->sniffer_tx_root_ns)
return &steering->sniffer_tx_root_ns->ns;
return NULL;
- default:
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ root_ns = steering->fdb_root_ns;
+ prio = FDB_BYPASS_PATH;
break;
- }
-
- if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
+ case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
root_ns = steering->egress_root_ns;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_BYPASS_PRIO;
- } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL:
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_KERNEL_PRIO;
- } else { /* Must be NIC RX */
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ root_ns = steering->rdma_tx_root_ns;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS:
+ root_ns = steering->rdma_rx_root_ns;
+ prio = RDMA_RX_COUNTERS_PRIO;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS:
+ root_ns = steering->rdma_tx_root_ns;
+ prio = RDMA_TX_COUNTERS_PRIO;
+ break;
+ default: /* Must be NIC RX */
+ WARN_ON(!is_nic_rx_ns(type));
root_ns = steering->root_ns;
prio = type;
+ break;
}
if (!root_ns)
@@ -2158,17 +2385,21 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
{
struct mlx5_flow_steering *steering = dev->priv.steering;
- if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
+ if (!steering)
return NULL;
switch (type) {
case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
+ if (vport >= steering->esw_egress_acl_vports)
+ return NULL;
if (steering->esw_egress_root_ns &&
steering->esw_egress_root_ns[vport])
return &steering->esw_egress_root_ns[vport]->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ if (vport >= steering->esw_ingress_acl_vports)
+ return NULL;
if (steering->esw_ingress_root_ns &&
steering->esw_ingress_root_ns[vport])
return &steering->esw_ingress_root_ns[vport]->ns;
@@ -2255,7 +2486,7 @@ static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
#define FLOW_TABLE_BIT_SZ 1
#define GET_FLOW_TABLE_CAP(dev, offset) \
- ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \
+ ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) + \
offset / 32)) >> \
(32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
@@ -2324,14 +2555,12 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
struct init_tree_node *init_node,
struct fs_node *fs_parent_node)
{
- int i;
- struct mlx5_flow_namespace *fs_ns;
int err;
+ int i;
- fs_get_obj(fs_ns, fs_parent_node);
for (i = 0; i < init_node->ar_size; i++) {
err = init_root_tree_recursive(steering, &init_node->children[i],
- &fs_ns->node,
+ fs_parent_node,
init_node, i);
if (err)
return err;
@@ -2339,6 +2568,17 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
return 0;
}
+static void del_sw_root_ns(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_namespace *ns;
+
+ fs_get_obj(ns, node);
+ root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns);
+ mutex_destroy(&root_ns->chain_lock);
+ kfree(node);
+}
+
static struct mlx5_flow_root_namespace
*create_root_ns(struct mlx5_flow_steering *steering,
enum fs_flow_table_type table_type)
@@ -2347,10 +2587,6 @@ static struct mlx5_flow_root_namespace
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_namespace *ns;
- if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
- (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
- cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
-
/* Create the root namespace */
root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
if (!root_ns)
@@ -2365,7 +2601,7 @@ static struct mlx5_flow_root_namespace
ns = &root_ns->ns;
fs_init_namespace(ns);
mutex_init(&root_ns->chain_lock);
- tree_init_node(&ns->node, NULL, NULL);
+ tree_init_node(&ns->node, NULL, del_sw_root_ns);
tree_add_node(&ns->node, NULL);
return root_ns;
@@ -2396,7 +2632,7 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
/* If this a prio with chains, and we can jump from one chain
- * (namepsace) to another, so we accumulate the levels
+ * (namespace) to another, so we accumulate the levels
*/
if (prio->node.type == FS_TYPE_PRIO_CHAINS)
acc_level = acc_level_ns;
@@ -2491,57 +2727,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
clean_tree(&root_ns->ns.node);
}
-static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
-{
- struct mlx5_flow_steering *steering = dev->priv.steering;
- int i;
-
- if (!steering->esw_egress_root_ns)
- return;
-
- for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
- cleanup_root_ns(steering->esw_egress_root_ns[i]);
-
- kfree(steering->esw_egress_root_ns);
- steering->esw_egress_root_ns = NULL;
-}
-
-static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
-{
- struct mlx5_flow_steering *steering = dev->priv.steering;
- int i;
-
- if (!steering->esw_ingress_root_ns)
- return;
-
- for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
- cleanup_root_ns(steering->esw_ingress_root_ns[i]);
-
- kfree(steering->esw_ingress_root_ns);
- steering->esw_ingress_root_ns = NULL;
-}
-
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
-{
- struct mlx5_flow_steering *steering = dev->priv.steering;
-
- cleanup_root_ns(steering->root_ns);
- cleanup_egress_acls_root_ns(dev);
- cleanup_ingress_acls_root_ns(dev);
- cleanup_root_ns(steering->fdb_root_ns);
- steering->fdb_root_ns = NULL;
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- cleanup_root_ns(steering->sniffer_rx_root_ns);
- cleanup_root_ns(steering->sniffer_tx_root_ns);
- cleanup_root_ns(steering->rdma_rx_root_ns);
- cleanup_root_ns(steering->egress_root_ns);
- mlx5_cleanup_fc_stats(dev);
- kmem_cache_destroy(steering->ftes_cache);
- kmem_cache_destroy(steering->fgs_cache);
- kfree(steering);
-}
-
static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;
@@ -2568,6 +2753,21 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
return PTR_ERR_OR_ZERO(prio);
}
+#define PORT_SEL_NUM_LEVELS 3
+static int init_port_sel_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL);
+ if (!steering->port_sel_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0,
+ PORT_SEL_NUM_LEVELS);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
{
int err;
@@ -2591,6 +2791,29 @@ out_err:
return err;
}
+static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX);
+ if (!steering->rdma_tx_root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &rdma_tx_root_fs,
+ &steering->rdma_tx_root_ns->ns.node);
+ if (err)
+ goto out_err;
+
+ set_prio_attrs(steering->rdma_tx_root_ns);
+
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->rdma_tx_root_ns);
+ steering->rdma_tx_root_ns = NULL;
+ return err;
+}
+
/* FT and tc chains are stored in the same array so we can re-use the
* mlx5_get_fdb_sub_ns() and tc api for FT chains.
* When creating a new ns for each chain store it in the first available slot.
@@ -2675,6 +2898,36 @@ static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
return 0;
}
+static int create_fdb_bypass(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_flow_namespace *ns;
+ struct fs_prio *prio;
+ int i;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+
+ for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) {
+ prio = fs_create_prio(ns, i, 1);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
+}
+
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *maj_prio;
@@ -2684,15 +2937,25 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
if (!steering->fdb_root_ns)
return -ENOMEM;
- maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
- 1);
+ err = create_fdb_bypass(steering);
+ if (err)
+ goto out_err;
+
+ err = create_fdb_fast_path(steering);
+ if (err)
+ goto out_err;
+
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_TC_MISS, 1);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
- err = create_fdb_fast_path(steering);
- if (err)
+
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
goto out_err;
+ }
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
if (IS_ERR(maj_prio)) {
@@ -2700,14 +2963,22 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
goto out_err;
}
+ /* We put this priority last, knowing that nothing will get here
+ * unless explicitly forwarded to. This is possible because the
+ * slow path tables have catch all rules and nothing gets passed
+ * those tables.
+ */
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
+ goto out_err;
+ }
+
set_prio_attrs(steering->fdb_root_ns);
return 0;
out_err:
- cleanup_root_ns(steering->fdb_root_ns);
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- steering->fdb_root_ns = NULL;
+ cleanup_fdb_root_ns(steering);
return err;
}
@@ -2737,10 +3008,9 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo
return PTR_ERR_OR_ZERO(prio);
}
-static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
+int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
- int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
@@ -2756,7 +3026,7 @@ static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
if (err)
goto cleanup_root_ns;
}
-
+ steering->esw_egress_acl_vports = total_vports;
return 0;
cleanup_root_ns:
@@ -2767,10 +3037,24 @@ cleanup_root_ns:
return err;
}
-static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_egress_root_ns)
+ return;
+
+ for (i = 0; i < steering->esw_egress_acl_vports; i++)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+}
+
+int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
- int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
@@ -2786,7 +3070,7 @@ static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
if (err)
goto cleanup_root_ns;
}
-
+ steering->esw_ingress_acl_vports = total_vports;
return 0;
cleanup_root_ns:
@@ -2797,6 +3081,37 @@ cleanup_root_ns:
return err;
}
+void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_ingress_root_ns)
+ return;
+
+ for (i = 0; i < steering->esw_ingress_acl_vports; i++)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+}
+
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ ns = mlx5_get_flow_namespace(dev, type);
+ if (!ns)
+ return 0;
+
+ root = find_root(&ns->node);
+ if (!root)
+ return 0;
+
+ return root->cmds->get_capabilities(root, root->table_type);
+}
+
static int init_egress_root_ns(struct mlx5_flow_steering *steering)
{
int err;
@@ -2818,30 +3133,24 @@ cleanup:
return err;
}
-int mlx5_init_fs(struct mlx5_core_dev *dev)
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
{
- struct mlx5_flow_steering *steering;
- int err = 0;
-
- err = mlx5_init_fc_stats(dev);
- if (err)
- return err;
+ struct mlx5_flow_steering *steering = dev->priv.steering;
- steering = kzalloc(sizeof(*steering), GFP_KERNEL);
- if (!steering)
- return -ENOMEM;
- steering->dev = dev;
- dev->priv.steering = steering;
+ cleanup_root_ns(steering->root_ns);
+ cleanup_fdb_root_ns(steering);
+ cleanup_root_ns(steering->port_sel_root_ns);
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ cleanup_root_ns(steering->rdma_rx_root_ns);
+ cleanup_root_ns(steering->rdma_tx_root_ns);
+ cleanup_root_ns(steering->egress_root_ns);
+}
- steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
- sizeof(struct mlx5_flow_group), 0,
- 0, NULL);
- steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
- 0, NULL);
- if (!steering->ftes_cache || !steering->fgs_cache) {
- err = -ENOMEM;
- goto err;
- }
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err = 0;
if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
@@ -2859,16 +3168,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
if (err)
goto err;
}
- if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
- err = init_egress_acls_root_ns(dev);
- if (err)
- goto err;
- }
- if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
- err = init_ingress_acls_root_ns(dev);
- if (err)
- goto err;
- }
}
if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
@@ -2883,6 +3182,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}
+ if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) {
+ err = init_port_sel_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
err = init_rdma_rx_root_ns(steering);
@@ -2890,15 +3195,77 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}
- if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
+ if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) {
+ err = init_rdma_tx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
err = init_egress_root_ns(steering);
if (err)
goto err;
}
return 0;
+
+err:
+ mlx5_fs_core_cleanup(dev);
+ return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
+ kfree(steering);
+ mlx5_ft_pool_destroy(dev);
+ mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering;
+ int err = 0;
+
+ err = mlx5_init_fc_stats(dev);
+ if (err)
+ return err;
+
+ err = mlx5_ft_pool_init(dev);
+ if (err)
+ goto err;
+
+ steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+ if (!steering) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ steering->dev = dev;
+ dev->priv.steering = steering;
+
+ if (mlx5_fs_dr_is_supported(dev))
+ steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else
+ steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+
err:
- mlx5_cleanup_fs(dev);
+ mlx5_fs_core_free(dev);
return err;
}
@@ -3039,9 +3406,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
EXPORT_SYMBOL(mlx5_modify_header_dealloc);
struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
- int reformat_type,
- size_t size,
- void *reformat_data,
+ struct mlx5_pkt_reformat_params *params,
enum mlx5_flow_namespace_type ns_type)
{
struct mlx5_pkt_reformat *pkt_reformat;
@@ -3057,9 +3422,8 @@ struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
return ERR_PTR(-ENOMEM);
pkt_reformat->ns_type = ns_type;
- pkt_reformat->reformat_type = reformat_type;
- err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
- reformat_data, ns_type,
+ pkt_reformat->reformat_type = params->type;
+ err = root->cmds->packet_reformat_alloc(root, params, ns_type,
pkt_reformat);
if (err) {
kfree(pkt_reformat);
@@ -3083,6 +3447,52 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
+int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer)
+{
+ return definer->id;
+}
+
+struct mlx5_flow_definer *
+mlx5_create_match_definer(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type ns_type, u16 format_id,
+ u32 *match_mask)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_definer *definer;
+ int id;
+
+ root = get_root_namespace(dev, ns_type);
+ if (!root)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ definer = kzalloc(sizeof(*definer), GFP_KERNEL);
+ if (!definer)
+ return ERR_PTR(-ENOMEM);
+
+ definer->ns_type = ns_type;
+ id = root->cmds->create_match_definer(root, format_id, match_mask);
+ if (id < 0) {
+ mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id);
+ kfree(definer);
+ return ERR_PTR(id);
+ }
+ definer->id = id;
+ return definer;
+}
+
+void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
+ struct mlx5_flow_definer *definer)
+{
+ struct mlx5_flow_root_namespace *root;
+
+ root = get_root_namespace(dev, definer->ns_type);
+ if (WARN_ON(!root))
+ return;
+
+ root->cmds->destroy_match_definer(root, definer->id);
+ kfree(definer);
+}
+
int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index be5f5e32c1e8..3af50fd04d28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -39,6 +39,21 @@
#include <linux/llist.h>
#include <steering/fs_dr.h>
+#define FDB_TC_MAX_CHAIN 3
+#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1)
+#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1)
+
+/* The index of the last real chain (FT) + 1 as chain zero is valid as well */
+#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1)
+
+#define FDB_TC_MAX_PRIO 16
+#define FDB_TC_LEVELS_PER_PRIO 2
+
+struct mlx5_flow_definer {
+ enum mlx5_flow_namespace_type ns_type;
+ u32 id;
+};
+
struct mlx5_modify_hdr {
enum mlx5_flow_namespace_type ns_type;
union {
@@ -86,7 +101,9 @@ enum fs_flow_table_type {
FS_FT_SNIFFER_RX = 0X5,
FS_FT_SNIFFER_TX = 0X6,
FS_FT_RDMA_RX = 0X7,
- FS_FT_MAX_TYPE = FS_FT_RDMA_RX,
+ FS_FT_RDMA_TX = 0X8,
+ FS_FT_PORT_SEL = 0X9,
+ FS_FT_MAX_TYPE = FS_FT_PORT_SEL,
};
enum fs_flow_table_op_mod {
@@ -103,6 +120,11 @@ enum mlx5_flow_steering_mode {
MLX5_FLOW_STEERING_MODE_SMFS
};
+enum mlx5_flow_steering_capabilty {
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
+ MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
+};
+
struct mlx5_flow_steering {
struct mlx5_core_dev *dev;
enum mlx5_flow_steering_mode mode;
@@ -116,7 +138,11 @@ struct mlx5_flow_steering {
struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
struct mlx5_flow_root_namespace *rdma_rx_root_ns;
+ struct mlx5_flow_root_namespace *rdma_tx_root_ns;
struct mlx5_flow_root_namespace *egress_root_ns;
+ struct mlx5_flow_root_namespace *port_sel_root_ns;
+ int esw_egress_acl_vports;
+ int esw_ingress_acl_vports;
};
struct fs_node {
@@ -136,6 +162,7 @@ struct fs_node {
struct mlx5_flow_rule {
struct fs_node node;
+ struct mlx5_flow_table *ft;
struct mlx5_flow_destination dest_attr;
/* next_ft should be accessed under chain_lock and only of
* destination type is FWD_NEXT_fT.
@@ -173,6 +200,7 @@ struct mlx5_flow_table {
u32 flags;
struct rhltable fgs_hash;
enum mlx5_flow_table_miss_action def_miss_action;
+ struct mlx5_flow_namespace *ns;
};
struct mlx5_ft_underlay_qp {
@@ -180,7 +208,7 @@ struct mlx5_ft_underlay_qp {
u32 qpn;
};
-#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_a00
+#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00
/* Calculate the fte_match_param length and without the reserved length.
* Make sure the reserved field is the last.
*/
@@ -198,6 +226,7 @@ struct fs_fte {
struct mlx5_fs_dr_rule fs_dr_rule;
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
u32 dests_size;
+ u32 fwd_dests;
u32 index;
struct mlx5_flow_context flow_context;
struct mlx5_flow_act action;
@@ -270,8 +299,19 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
enum mlx5_flow_steering_mode mode);
-int mlx5_init_fs(struct mlx5_core_dev *dev);
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
+void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
+int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
+void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type);
+
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
@@ -310,13 +350,16 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \
(type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \
+ (type == FS_FT_NIC_TX) ? MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) : \
(type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \
(type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \
(type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \
(type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \
(type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \
(type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) : \
- (BUILD_BUG_ON_ZERO(FS_FT_RDMA_RX != FS_FT_MAX_TYPE))\
+ (type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) : \
+ (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \
+ (BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\
)
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index ab69effb056d..b406e0367af6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -38,8 +38,10 @@
#include "fs_cmd.h"
#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+#define MLX5_INIT_COUNTERS_BULK 8
#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
#define MLX5_FC_POOL_USED_BUFF_RATIO 10
@@ -144,10 +146,16 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
spin_unlock(&fc_stats->counters_idr_lock);
}
+static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
+{
+ return min_t(int, MLX5_INIT_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
+
static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
{
return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
- (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
}
static void update_counter_cache(int index, u32 *bulk_raw_data,
@@ -172,7 +180,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
bool query_more_counters = (first->id <= last_id);
- int max_bulk_len = get_max_bulk_query_len(dev);
+ int cur_bulk_len = fc_stats->bulk_query_len;
u32 *data = fc_stats->bulk_query_out;
struct mlx5_fc *counter = first;
u32 bulk_base_id;
@@ -184,7 +192,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
bulk_base_id = counter->id & ~0x3;
/* number of counters to query inc. the last counter */
- bulk_len = min_t(int, max_bulk_len,
+ bulk_len = min_t(int, cur_bulk_len,
ALIGN(last_id - bulk_base_id + 1, 4));
err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
@@ -225,6 +233,41 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
mlx5_fc_free(dev, counter);
}
+static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int max_bulk_len = get_max_bulk_query_len(dev);
+ unsigned long now = jiffies;
+ u32 *bulk_query_out_tmp;
+ int max_out_len;
+
+ if (fc_stats->bulk_query_alloc_failed &&
+ time_before(now, fc_stats->next_bulk_query_alloc))
+ return;
+
+ max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+ bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
+ if (!bulk_query_out_tmp) {
+ mlx5_core_warn_once(dev,
+ "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n",
+ max_bulk_len);
+ fc_stats->bulk_query_alloc_failed = true;
+ fc_stats->next_bulk_query_alloc =
+ now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
+ return;
+ }
+
+ kfree(fc_stats->bulk_query_out);
+ fc_stats->bulk_query_out = bulk_query_out_tmp;
+ fc_stats->bulk_query_len = max_bulk_len;
+ if (fc_stats->bulk_query_alloc_failed) {
+ mlx5_core_info(dev,
+ "Flow counters bulk query buffer size increased, bulk_size(%d)\n",
+ max_bulk_len);
+ fc_stats->bulk_query_alloc_failed = false;
+ }
+}
+
static void mlx5_fc_stats_work(struct work_struct *work)
{
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
@@ -242,15 +285,22 @@ static void mlx5_fc_stats_work(struct work_struct *work)
queue_delayed_work(fc_stats->wq, &fc_stats->work,
fc_stats->sampling_interval);
- llist_for_each_entry(counter, addlist, addlist)
+ llist_for_each_entry(counter, addlist, addlist) {
mlx5_fc_stats_insert(dev, counter);
+ fc_stats->num_counters++;
+ }
llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
mlx5_fc_stats_remove(dev, counter);
mlx5_fc_release(dev, counter);
+ fc_stats->num_counters--;
}
+ if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
+ fc_stats->num_counters > get_init_bulk_query_len(dev))
+ mlx5_fc_stats_bulk_query_size_increase(dev);
+
if (time_before(now, fc_stats->next_query) ||
list_empty(&fc_stats->counters))
return;
@@ -296,7 +346,7 @@ static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
return mlx5_fc_single_alloc(dev);
}
-struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging)
{
struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -327,8 +377,6 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
goto err_out_alloc;
llist_add(&counter->addlist, &fc_stats->addlist);
-
- mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
}
return counter;
@@ -337,6 +385,16 @@ err_out_alloc:
mlx5_fc_release(dev, counter);
return ERR_PTR(err);
}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (aging)
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ return counter;
+}
EXPORT_SYMBOL(mlx5_fc_create);
u32 mlx5_fc_id(struct mlx5_fc *counter)
@@ -365,8 +423,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy);
int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- int max_bulk_len;
- int max_out_len;
+ int init_bulk_len;
+ int init_out_len;
spin_lock_init(&fc_stats->counters_idr_lock);
idr_init(&fc_stats->counters_idr);
@@ -374,11 +432,12 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
init_llist_head(&fc_stats->addlist);
init_llist_head(&fc_stats->dellist);
- max_bulk_len = get_max_bulk_query_len(dev);
- max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
- fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+ init_bulk_len = get_init_bulk_query_len(dev);
+ init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
+ fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
if (!fc_stats->bulk_query_out)
return -ENOMEM;
+ fc_stats->bulk_query_len = init_bulk_len;
fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
if (!fc_stats->wq)
@@ -470,7 +529,7 @@ struct mlx5_fc_bulk {
u32 base_id;
int bulk_len;
unsigned long *bitmask;
- struct mlx5_fc fcs[0];
+ struct mlx5_fc fcs[];
};
static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
@@ -497,13 +556,12 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
- bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc),
- GFP_KERNEL);
+ bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL);
if (!bulk)
goto err_alloc_bulk;
- bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
- GFP_KERNEL);
+ bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
+ GFP_KERNEL);
if (!bulk->bitmask)
goto err_alloc_bitmask;
@@ -521,9 +579,9 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
return bulk;
err_mlx5_cmd_bulk_alloc:
- kfree(bulk->bitmask);
+ kvfree(bulk->bitmask);
err_alloc_bitmask:
- kfree(bulk);
+ kvfree(bulk);
err_alloc_bulk:
return ERR_PTR(err);
}
@@ -537,8 +595,8 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
}
mlx5_cmd_fc_free(dev, bulk->base_id);
- kfree(bulk->bitmask);
- kfree(bulk);
+ kvfree(bulk->bitmask);
+ kvfree(bulk);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
new file mode 100644
index 000000000000..c14590acc772
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include "fs_ft_pool.h"
+
+/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS),
+ * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via mlx5_ft_pool_get_avail_sz.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small and match firmware
+ * pools.
+ */
+#define FT_SIZE (16 * 1024 * 1024)
+static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024,
+ 1 * 1024 * 1024,
+ 64 * 1024,
+ 128,
+ 1 /* size for termination tables */ };
+struct mlx5_ft_pool {
+ int ft_left[ARRAY_SIZE(FT_POOLS)];
+};
+
+int mlx5_ft_pool_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_ft_pool *ft_pool;
+ int i;
+
+ ft_pool = kzalloc(sizeof(*ft_pool), GFP_KERNEL);
+ if (!ft_pool)
+ return -ENOMEM;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
+ ft_pool->ft_left[i] = FT_SIZE / FT_POOLS[i];
+
+ dev->priv.ft_pool = ft_pool;
+ return 0;
+}
+
+void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev)
+{
+ kfree(dev->priv.ft_pool);
+}
+
+int
+mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type,
+ int desired_size)
+{
+ u32 max_ft_size = 1 << MLX5_CAP_FLOWTABLE_TYPE(dev, log_max_ft_size, table_type);
+ int i, found_i = -1;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+ if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size &&
+ FT_POOLS[i] <= max_ft_size) {
+ found_i = i;
+ if (desired_size != POOL_NEXT_SIZE)
+ break;
+ }
+ }
+
+ if (found_i != -1) {
+ --dev->priv.ft_pool->ft_left[found_i];
+ return FT_POOLS[found_i];
+ }
+
+ return 0;
+}
+
+void
+mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz)
+{
+ int i;
+
+ if (!sz)
+ return;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+ if (sz == FT_POOLS[i]) {
+ ++dev->priv.ft_pool->ft_left[i];
+ return;
+ }
+ }
+
+ WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h
new file mode 100644
index 000000000000..25f4274b372b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_FS_FT_POOL_H__
+#define __MLX5_FS_FT_POOL_H__
+
+#include <linux/mlx5/driver.h>
+#include "fs_core.h"
+
+#define POOL_NEXT_SIZE 0
+
+int mlx5_ft_pool_init(struct mlx5_core_dev *dev);
+void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev);
+
+int
+mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type,
+ int desired_size);
+void
+mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz);
+
+#endif /* __MLX5_FS_FT_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 909a7f284614..f34e758a2f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -31,11 +31,10 @@
*/
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include <linux/mlx5/eswitch.h>
-#include <linux/module.h>
#include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h"
+#include "lib/tout.h"
enum {
MCQS_IDENTIFIER_BOOT_IMG = 0x1,
@@ -68,26 +67,19 @@ enum {
MCQI_FW_STORED_VERSION = 1,
};
-static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
- int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0};
-
- MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
int mlx5_query_board_id(struct mlx5_core_dev *dev)
{
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {};
int err;
out = kzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
- err = mlx5_cmd_query_adapter(dev, out, outlen);
+ MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+ err = mlx5_cmd_exec_inout(dev, query_adapter, in, out);
if (err)
goto out;
@@ -106,13 +98,15 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id)
{
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {};
int err;
out = kzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
- err = mlx5_cmd_query_adapter(mdev, out, outlen);
+ MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+ err = mlx5_cmd_exec_inout(mdev, query_adapter, in, out);
if (err)
goto out;
@@ -153,6 +147,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
if (err)
return err;
+ if (MLX5_CAP_GEN(dev, port_selection_cap)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, hca_cap_2)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
+ if (err)
+ return err;
+ }
+
if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
if (err)
@@ -242,7 +248,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
- if (MLX5_CAP_GEN(dev, tls_tx)) {
+ if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
if (err)
return err;
@@ -255,13 +261,37 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, ipsec_offload)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_IPSEC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, shampo)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN_64(dev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_MACSEC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, adv_virtualization)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ADV_VIRTUALIZATION);
+ if (err)
+ return err;
+ }
+
return 0;
}
int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
{
- u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {};
int i;
MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
@@ -272,16 +302,19 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
sw_owner_id[i]);
}
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) &&
+ dev->priv.sw_vhca_id > 0)
+ MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id);
+
+ return mlx5_cmd_exec_in(dev, init_hca, in);
}
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
{
- u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, teardown_hca, in);
}
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
@@ -312,12 +345,11 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
return 0;
}
-#define MLX5_FAST_TEARDOWN_WAIT_MS 3000
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
{
- unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
- u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+ unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN);
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
int state;
int ret;
@@ -330,7 +362,7 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
MLX5_SET(teardown_hca_in, in, profile,
MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ ret = mlx5_cmd_exec_inout(dev, teardown_hca, in, out);
if (ret)
return ret;
@@ -613,6 +645,45 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
fwhandle, 0);
}
+static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u32 out[MLX5_ST_SZ_DW(mirc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mirc_reg)];
+ unsigned long exp_time;
+ int err;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE));
+
+ if (!MLX5_CAP_MCAM_REG2(dev, mirc))
+ return -EOPNOTSUPP;
+
+ memset(in, 0, sizeof(in));
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MIRC, 0, 1);
+ if (err)
+ return err;
+
+ do {
+ memset(out, 0, sizeof(out));
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MIRC, 0, 0);
+ if (err)
+ return err;
+
+ *status = MLX5_GET(mirc_reg, out, status_code);
+ if (*status != MLXFW_FSM_REACTIVATE_STATUS_BUSY)
+ return 0;
+
+ msleep(20);
+ } while (time_before(jiffies, exp_time));
+
+ return 0;
+}
+
static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
.component_query = mlx5_component_query,
.fsm_lock = mlx5_fsm_lock,
@@ -620,6 +691,7 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
.fsm_block_download = mlx5_fsm_block_download,
.fsm_component_verify = mlx5_fsm_component_verify,
.fsm_activate = mlx5_fsm_activate,
+ .fsm_reactivate = mlx5_fsm_reactivate,
.fsm_query_state = mlx5_fsm_query_state,
.fsm_cancel = mlx5_fsm_cancel,
.fsm_release = mlx5_fsm_release
@@ -634,6 +706,7 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev,
.ops = &mlx5_mlxfw_dev_ops,
.psid = dev->board_id,
.psid_size = strlen(dev->board_id),
+ .devlink = priv_to_devlink(dev),
},
.mlx5_core_dev = dev
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
new file mode 100644
index 000000000000..9d908a0ccfef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "fw_reset.h"
+#include "diag/fw_tracer.h"
+#include "lib/tout.h"
+
+enum {
+ MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
+ MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
+ MLX5_FW_RESET_FLAGS_PENDING_COMP,
+ MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
+};
+
+struct mlx5_fw_reset {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ struct workqueue_struct *wq;
+ struct work_struct fw_live_patch_work;
+ struct work_struct reset_request_work;
+ struct work_struct reset_reload_work;
+ struct work_struct reset_now_work;
+ struct work_struct reset_abort_work;
+ unsigned long reset_flags;
+ struct timer_list timer;
+ struct completion done;
+ int ret;
+};
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (enable)
+ clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+ else
+ set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
+ u8 reset_type_sel, u8 sync_resp, bool sync_start)
+{
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+
+ MLX5_SET(mfrl_reg, in, reset_level, reset_level);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
+}
+
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
+ u8 *reset_type, u8 *reset_state)
+{
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0);
+ if (err)
+ return err;
+
+ if (reset_level)
+ *reset_level = MLX5_GET(mfrl_reg, out, reset_level);
+ if (reset_type)
+ *reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+ if (reset_state)
+ *reset_state = MLX5_GET(mfrl_reg, out, reset_state);
+
+ return 0;
+}
+
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+{
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
+}
+
+static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack)
+{
+ u8 reset_state;
+
+ if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+ goto out;
+
+ switch (reset_state) {
+ case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
+ case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
+ return -EBUSY;
+ case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
+ return -ETIMEDOUT;
+ case MLX5_MFRL_REG_RESET_STATE_NACK:
+ NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
+ return -EPERM;
+ }
+
+out:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset failed");
+ return -EIO;
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ int err;
+
+ set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+
+ MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1);
+ err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MFRL, 0, 1, false);
+ if (!err)
+ return 0;
+
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
+ return mlx5_fw_reset_get_reset_state_err(dev, extack);
+
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
+ return mlx5_cmd_check(dev, err, in, out);
+}
+
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
+}
+
+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ /* if this is the driver that initiated the fw reset, devlink completed the reload */
+ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
+ complete(&fw_reset->done);
+ } else {
+ mlx5_unload_one(dev);
+ if (mlx5_health_wait_pci_up(dev))
+ mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
+ else
+ mlx5_load_one(dev, false);
+ devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
+ BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+ BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+ }
+}
+
+static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ del_timer_sync(&fw_reset->timer);
+}
+
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
+ mlx5_stop_sync_reset_poll(dev);
+ if (poll_health)
+ mlx5_start_health_poll(dev);
+ return 0;
+}
+
+static void mlx5_sync_reset_reload_work(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_reload_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_sync_reset_clear_reset_requested(dev, false);
+ mlx5_enter_error_state(dev, true);
+ mlx5_fw_reset_complete_reload(dev);
+}
+
+#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
+static void poll_sync_reset(struct timer_list *t)
+{
+ struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ u32 fatal_error;
+
+ if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ return;
+
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
+
+ if (fatal_error) {
+ mlx5_core_warn(dev, "Got Device Reset\n");
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ else
+ mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
+ return;
+ }
+
+ mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL));
+}
+
+static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ timer_setup(&fw_reset->timer, poll_sync_reset, 0);
+ fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL);
+ add_timer(&fw_reset->timer);
+}
+
+static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false);
+}
+
+static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
+}
+
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
+ mlx5_stop_health_poll(dev, true);
+ mlx5_start_sync_reset_poll(dev);
+ return 0;
+}
+
+static void mlx5_fw_live_patch_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ fw_live_patch_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
+
+ if (mlx5_fw_tracer_reload(dev->tracer))
+ mlx5_core_err(dev, "Failed to reload FW tracer\n");
+}
+
+static void mlx5_sync_reset_request_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_request_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) {
+ err = mlx5_fw_reset_set_reset_sync_nack(dev);
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
+ err ? "Failed" : "Sent");
+ return;
+ }
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
+ err = mlx5_fw_reset_set_reset_sync_ack(dev);
+ if (err)
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
+ else
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
+}
+
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+{
+ struct pci_bus *bridge_bus = dev->pdev->bus;
+ struct pci_dev *bridge = bridge_bus->self;
+ u16 reg16, dev_id, sdev_id;
+ unsigned long timeout;
+ struct pci_dev *sdev;
+ int cap, err;
+ u32 reg32;
+
+ /* Check that all functions under the pci bridge are PFs of
+ * this device otherwise fail this function.
+ */
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+ if (err)
+ return err;
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
+ if (err)
+ return err;
+ if (sdev_id != dev_id)
+ return -EPERM;
+ }
+
+ cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
+ if (!cap)
+ return -EOPNOTSUPP;
+
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ pci_save_state(sdev);
+ pci_cfg_access_lock(sdev);
+ }
+ /* PCI link toggle */
+ err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, &reg16);
+ if (err)
+ return err;
+ reg16 |= PCI_EXP_LNKCTL_LD;
+ err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+ if (err)
+ return err;
+ msleep(500);
+ reg16 &= ~PCI_EXP_LNKCTL_LD;
+ err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+ if (err)
+ return err;
+
+ /* Check link */
+ err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
+ if (err)
+ return err;
+ if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
+ mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
+ msleep(1000);
+ goto restore;
+ }
+
+ timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE));
+ do {
+ err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
+ if (err)
+ return err;
+ if (reg16 & PCI_EXP_LNKSTA_DLLLA)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
+ mlx5_core_info(dev, "PCI Link up\n");
+ } else {
+ mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n",
+ reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
+ err = -ETIMEDOUT;
+ }
+
+ do {
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &reg16);
+ if (err)
+ return err;
+ if (reg16 == dev_id)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 == dev_id) {
+ mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n");
+ } else {
+ mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n",
+ reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
+ err = -ETIMEDOUT;
+ }
+
+restore:
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ pci_cfg_access_unlock(sdev);
+ pci_restore_state(sdev);
+ }
+
+ return err;
+}
+
+static void mlx5_sync_reset_now_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_now_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
+
+ mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
+
+ err = mlx5_cmd_fast_teardown_hca(dev);
+ if (err) {
+ mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
+ goto done;
+ }
+
+ err = mlx5_pci_link_toggle(dev);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+ goto done;
+ }
+
+ mlx5_enter_error_state(dev, true);
+done:
+ fw_reset->ret = err;
+ mlx5_fw_reset_complete_reload(dev);
+}
+
+static void mlx5_sync_reset_abort_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_abort_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
+ return;
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
+}
+
+static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe;
+ u8 sync_event_rst_type;
+
+ sync_fw_update_eqe = &eqe->data.sync_fw_update;
+ sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK;
+ switch (sync_event_rst_type) {
+ case MLX5_SYNC_RST_STATE_RESET_REQUEST:
+ queue_work(fw_reset->wq, &fw_reset->reset_request_work);
+ break;
+ case MLX5_SYNC_RST_STATE_RESET_NOW:
+ queue_work(fw_reset->wq, &fw_reset->reset_now_work);
+ break;
+ case MLX5_SYNC_RST_STATE_RESET_ABORT:
+ queue_work(fw_reset->wq, &fw_reset->reset_abort_work);
+ break;
+ }
+}
+
+static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
+ struct mlx5_eqe *eqe = data;
+
+ if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ return NOTIFY_DONE;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
+ queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+ break;
+ case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
+ mlx5_sync_reset_events_handle(fw_reset, eqe);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
+{
+ unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE);
+ unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout);
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ int err;
+
+ if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
+ mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n",
+ pci_sync_update_timeout / 1000);
+ err = -ETIMEDOUT;
+ goto out;
+ }
+ err = fw_reset->ret;
+out:
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ return err;
+}
+
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
+ mlx5_eq_notifier_register(dev, &fw_reset->nb);
+}
+
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
+}
+
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+ cancel_work_sync(&fw_reset->fw_live_patch_work);
+ cancel_work_sync(&fw_reset->reset_request_work);
+ cancel_work_sync(&fw_reset->reset_reload_work);
+ cancel_work_sync(&fw_reset->reset_now_work);
+ cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
+
+ if (!fw_reset)
+ return -ENOMEM;
+ fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events");
+ if (!fw_reset->wq) {
+ kfree(fw_reset);
+ return -ENOMEM;
+ }
+
+ fw_reset->dev = dev;
+ dev->priv.fw_reset = fw_reset;
+
+ INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event);
+ INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+ INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
+ INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
+ INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
+
+ init_completion(&fw_reset->done);
+ return 0;
+}
+
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ destroy_workqueue(fw_reset->wq);
+ kfree(dev->priv.fw_reset);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
new file mode 100644
index 000000000000..dc141c7e641a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_FW_RESET_H
+#define __MLX5_FW_RESET_H
+
+#include "mlx5_core.h"
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack);
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
+
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d9f4e8c59c1f..86ed87d704f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -31,20 +31,19 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
+#include <linux/kern_levels.h>
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/mlx5.h"
#include "lib/pci_vsc.h"
+#include "lib/tout.h"
#include "diag/fw_tracer.h"
enum {
- MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
MAX_MISSES = 3,
};
@@ -75,6 +74,11 @@ enum {
MLX5_SENSOR_FW_SYND_RFR = 5,
};
+enum {
+ MLX5_SEVERITY_MASK = 0x7,
+ MLX5_SEVERITY_VALID_MASK = 0x8,
+};
+
u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
{
return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
@@ -99,19 +103,26 @@ static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
return (ioread32be(&h->fw_ver) == 0xffffffff);
}
+static int mlx5_health_get_rfr(u8 rfr_severity)
+{
+ return rfr_severity >> MLX5_RFR_BIT_OFFSET;
+}
+
static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
- u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
u8 synd = ioread8(&h->synd);
+ u8 rfr;
+
+ rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
if (rfr && synd)
mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
return rfr && synd;
}
-static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
{
if (sensor_pci_not_working(dev))
return MLX5_SENSOR_PCI_COMM_ERR;
@@ -171,10 +182,10 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
/* The reset only needs to be issued by one PF. The health buffer is
* shared between all functions, and will be cleared during a reset.
- * Check again to avoid a redundant 2nd reset. If the fatal erros was
+ * Check again to avoid a redundant 2nd reset. If the fatal errors was
* PCI related a reset won't help.
*/
- fatal_error = check_fatal_sensors(dev);
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
fatal_error == MLX5_SENSOR_NIC_DISABLED ||
fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
@@ -191,31 +202,38 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
return true;
}
-void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+static void enter_error_state(struct mlx5_core_dev *dev, bool force)
{
- mutex_lock(&dev->intf_state_mutex);
- if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
- goto unlock;
- if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
+ if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- goto unlock;
+ mlx5_cmd_flush(dev);
}
- if (check_fatal_sensors(dev) || force) {
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+}
+
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+{
+ bool err_detected = false;
+
+ /* Mark the device as fatal in order to abort FW commands */
+ if ((mlx5_health_check_fatal_sensors(dev) || force) &&
+ dev->state == MLX5_DEVICE_STATE_UP) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- mlx5_cmd_flush(dev);
+ err_detected = true;
}
+ mutex_lock(&dev->intf_state_mutex);
+ if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto unlock;/* a previous error is still being handled */
- mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+ enter_error_state(dev, force);
unlock:
mutex_unlock(&dev->intf_state_mutex);
}
-#define MLX5_CRDUMP_WAIT_MS 60000
-#define MLX5_FW_RESET_WAIT_MS 1000
void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
{
- unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+ unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
int lock = -EBUSY;
mutex_lock(&dev->intf_state_mutex);
@@ -224,12 +242,12 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "start\n");
- if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+ if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
/* Get cr-dump and reset FW semaphore */
lock = lock_sem_sw_reset(dev, true);
if (lock == -EBUSY) {
- delay_ms = MLX5_CRDUMP_WAIT_MS;
+ delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
goto recover_from_sw_reset;
}
/* Execute SW reset */
@@ -243,7 +261,7 @@ recover_from_sw_reset:
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
break;
- cond_resched();
+ msleep(20);
} while (!time_after(jiffies, end));
if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
@@ -299,31 +317,34 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
mlx5_disable_device(dev);
}
-/* How much time to wait until health resetting the driver (in msecs) */
-#define MLX5_RECOVERY_WAIT_MSECS 60000
-static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
{
unsigned long end;
- mlx5_core_warn(dev, "handling bad device here\n");
- mlx5_handle_bad_state(dev);
- end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
+ end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
while (sensor_pci_not_working(dev)) {
- if (time_after(jiffies, end)) {
- mlx5_core_err(dev,
- "health recovery flow aborted, PCI reads still not working\n");
- return -EIO;
- }
+ if (time_after(jiffies, end))
+ return -ETIMEDOUT;
msleep(100);
}
+ return 0;
+}
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+{
+ mlx5_core_warn(dev, "handling bad device here\n");
+ mlx5_handle_bad_state(dev);
+ if (mlx5_health_wait_pci_up(dev)) {
+ mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
+ return -EIO;
+ }
mlx5_core_err(dev, "starting health recovery flow\n");
- mlx5_recover_device(dev);
- if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
- check_fatal_sensors(dev)) {
+ if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
mlx5_core_err(dev, "health recovery failed\n");
return -EIO;
}
+
+ mlx5_core_info(dev, "health recovery succeeded\n");
return 0;
}
@@ -357,35 +378,74 @@ static const char *hsynd_str(u8 synd)
}
}
+static const char *mlx5_loglevel_str(int level)
+{
+ switch (level) {
+ case LOGLEVEL_EMERG:
+ return "EMERGENCY";
+ case LOGLEVEL_ALERT:
+ return "ALERT";
+ case LOGLEVEL_CRIT:
+ return "CRITICAL";
+ case LOGLEVEL_ERR:
+ return "ERROR";
+ case LOGLEVEL_WARNING:
+ return "WARNING";
+ case LOGLEVEL_NOTICE:
+ return "NOTICE";
+ case LOGLEVEL_INFO:
+ return "INFO";
+ case LOGLEVEL_DEBUG:
+ return "DEBUG";
+ }
+ return "Unknown log level";
+}
+
+static int mlx5_health_get_severity(u8 rfr_severity)
+{
+ return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
+ rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
+}
+
static void print_health_info(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
- char fw_str[18];
- u32 fw;
+ u8 rfr_severity;
+ int severity;
int i;
/* If the syndrome is 0, the device is OK and no need to print buffer */
if (!ioread8(&h->synd))
return;
+ if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
+ mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
+ return;
+ }
+
+ rfr_severity = ioread8(&h->rfr_severity);
+ severity = mlx5_health_get_severity(rfr_severity);
+ mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
+ hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
+
for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
- mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
- ioread32be(h->assert_var + i));
-
- mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
- ioread32be(&h->assert_exit_ptr));
- mlx5_core_err(dev, "assert_callra 0x%08x\n",
- ioread32be(&h->assert_callra));
- sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
- mlx5_core_err(dev, "fw_ver %s\n", fw_str);
- mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
- mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
- mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
- hsynd_str(ioread8(&h->synd)));
- mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
- fw = ioread32be(&h->fw_ver);
- mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
+ mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
+ ioread32be(h->assert_var + i));
+
+ mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
+ mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
+ fw_rev_sub(dev));
+ mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
+ mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
+ mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
+ mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
+ mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
+ hsynd_str(ioread8(&h->synd)));
+ mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+ mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
}
static int
@@ -434,6 +494,7 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
{
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
+ u8 rfr_severity;
int err;
int i;
@@ -466,9 +527,19 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
ioread32be(&h->assert_callra));
if (err)
return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
+ if (err)
+ return err;
err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
if (err)
return err;
+ rfr_severity = ioread8(&h->rfr_severity);
+ err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
+ if (err)
+ return err;
err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
ioread8(&h->irisc_index));
if (err)
@@ -530,7 +601,7 @@ static void mlx5_fw_reporter_err_work(struct work_struct *work)
fw_reporter_ctx.miss_counter = health->miss_counter;
if (fw_reporter_ctx.err_synd) {
devlink_health_report(health->fw_reporter,
- "FW syndrom reported", &fw_reporter_ctx);
+ "FW syndrome reported", &fw_reporter_ctx);
return;
}
if (fw_reporter_ctx.miss_counter)
@@ -595,22 +666,34 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
struct mlx5_fw_reporter_ctx fw_reporter_ctx;
struct mlx5_core_health *health;
struct mlx5_core_dev *dev;
+ struct devlink *devlink;
struct mlx5_priv *priv;
health = container_of(work, struct mlx5_core_health, fatal_report_work);
priv = container_of(health, struct mlx5_priv, health);
dev = container_of(priv, struct mlx5_core_dev, priv);
+ devlink = priv_to_devlink(dev);
- mlx5_enter_error_state(dev, false);
+ enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+ devl_lock(devlink);
if (mlx5_health_try_recover(dev))
mlx5_core_err(dev, "health recovery failed\n");
+ devl_unlock(devlink);
return;
}
fw_reporter_ctx.err_synd = health->synd;
fw_reporter_ctx.miss_counter = health->miss_counter;
- devlink_health_report(health->fw_fatal_reporter,
- "FW fatal error reported", &fw_reporter_ctx);
+ if (devlink_health_report(health->fw_fatal_reporter,
+ "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
+ /* If recovery wasn't performed, due to grace period,
+ * unload the driver. This ensures that the driver
+ * closes all its resources and it is not subjected to
+ * requests from the kernel.
+ */
+ mlx5_core_err(dev, "Driver is in error state. Unloading\n");
+ mlx5_unload_one(dev);
+ }
}
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
@@ -619,15 +702,29 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
.dump = mlx5_fw_fatal_reporter_dump,
};
-#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
+#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
+#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
+#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
+#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
+
static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct devlink *devlink = priv_to_devlink(dev);
+ u64 grace_period;
+
+ if (mlx5_core_is_ecpf(dev)) {
+ grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
+ } else if (mlx5_core_is_pf(dev)) {
+ grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
+ } else {
+ /* VF or SF */
+ grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
+ }
health->fw_reporter =
devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
- 0, false, dev);
+ 0, dev);
if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter));
@@ -635,8 +732,8 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
health->fw_fatal_reporter =
devlink_health_reporter_create(devlink,
&mlx5_fw_fatal_reporter_ops,
- MLX5_REPORTER_FW_GRACEFUL_PERIOD,
- true, dev);
+ grace_period,
+ dev);
if (IS_ERR(health->fw_fatal_reporter))
mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
PTR_ERR(health->fw_fatal_reporter));
@@ -653,13 +750,13 @@ static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
devlink_health_reporter_destroy(health->fw_fatal_reporter);
}
-static unsigned long get_next_poll_jiffies(void)
+static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
{
unsigned long next;
get_random_bytes(&next, sizeof(next));
next %= HZ;
- next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+ next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
return next;
}
@@ -677,6 +774,31 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
spin_unlock_irqrestore(&health->wq_lock, flags);
}
+#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
+static void mlx5_health_log_ts_update(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ u64 now_us;
+
+ health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ now_us = ktime_to_us(ktime_get_real());
+
+ MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
+ MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
+ mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
+
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
+ msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
+}
+
static void poll_health(struct timer_list *t)
{
struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
@@ -689,14 +811,15 @@ static void poll_health(struct timer_list *t)
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
goto out;
- fatal_error = check_fatal_sensors(dev);
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
if (fatal_error && !health->fatal_error) {
mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
dev->priv.health.fatal_error = fatal_error;
print_health_info(dev);
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_trigger_health_work(dev);
- goto out;
+ return;
}
count = ioread32be(health->health_counter);
@@ -718,11 +841,12 @@ static void poll_health(struct timer_list *t)
queue_work(health->wq, &health->report_work);
out:
- mod_timer(&health->timer, get_next_poll_jiffies());
+ mod_timer(&health->timer, get_next_poll_jiffies(dev));
}
void mlx5_start_health_poll(struct mlx5_core_dev *dev)
{
+ u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
struct mlx5_core_health *health = &dev->priv.health;
timer_setup(&health->timer, poll_health, 0);
@@ -731,7 +855,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;
- health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+ health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
add_timer(&health->timer);
}
@@ -749,6 +873,14 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
del_timer_sync(&health->timer);
}
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
+}
+
void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -757,21 +889,16 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
spin_lock_irqsave(&health->wq_lock, flags);
set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
spin_unlock_irqrestore(&health->wq_lock, flags);
+ cancel_delayed_work_sync(&health->update_fw_log_ts_work);
cancel_work_sync(&health->report_work);
cancel_work_sync(&health->fatal_report_work);
}
-void mlx5_health_flush(struct mlx5_core_dev *dev)
-{
- struct mlx5_core_health *health = &dev->priv.health;
-
- flush_workqueue(health->wq);
-}
-
void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
+ cancel_delayed_work_sync(&health->update_fw_log_ts_work);
destroy_workqueue(health->wq);
mlx5_fw_reporters_destroy(dev);
}
@@ -797,6 +924,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
spin_lock_init(&health->wq_lock);
INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
+ INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 90cb50fe17fd..c247cca154e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -32,6 +32,7 @@
#include "en.h"
#include "ipoib.h"
+#include "en/fs_ethtool.h"
static void mlx5i_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
@@ -39,7 +40,7 @@ static void mlx5i_get_drvinfo(struct net_device *dev,
struct mlx5e_priv *priv = mlx5i_epriv(dev);
mlx5e_ethtool_get_drvinfo(priv, drvinfo);
- strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]",
+ strscpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]",
sizeof(drvinfo->driver));
}
@@ -67,7 +68,9 @@ static void mlx5i_get_ethtool_stats(struct net_device *dev,
}
static int mlx5i_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
@@ -75,11 +78,13 @@ static int mlx5i_set_ringparam(struct net_device *dev,
}
static void mlx5i_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *param)
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
- mlx5e_ethtool_get_ringparam(priv, param);
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
}
static int mlx5i_set_channels(struct net_device *dev,
@@ -99,19 +104,23 @@ static void mlx5i_get_channels(struct net_device *dev,
}
static int mlx5i_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- return mlx5e_ethtool_set_coalesce(priv, coal);
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
}
static int mlx5i_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *coal)
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- return mlx5e_ethtool_get_coalesce(priv, coal);
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
}
static int mlx5i_get_ts_info(struct net_device *netdev,
@@ -130,14 +139,6 @@ static int mlx5i_flash_device(struct net_device *netdev,
return mlx5e_ethtool_flash_device(priv, flash);
}
-enum mlx5_ptys_width {
- MLX5_PTYS_WIDTH_1X = 1 << 0,
- MLX5_PTYS_WIDTH_2X = 1 << 1,
- MLX5_PTYS_WIDTH_4X = 1 << 2,
- MLX5_PTYS_WIDTH_8X = 1 << 3,
- MLX5_PTYS_WIDTH_12X = 1 << 4,
-};
-
static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
{
switch (width) {
@@ -158,6 +159,7 @@ enum mlx5_ptys_rate {
MLX5_PTYS_RATE_FDR = 1 << 4,
MLX5_PTYS_RATE_EDR = 1 << 5,
MLX5_PTYS_RATE_HDR = 1 << 6,
+ MLX5_PTYS_RATE_NDR = 1 << 7,
};
static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
@@ -170,28 +172,11 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
case MLX5_PTYS_RATE_FDR: return 14000;
case MLX5_PTYS_RATE_EDR: return 25000;
case MLX5_PTYS_RATE_HDR: return 50000;
+ case MLX5_PTYS_RATE_NDR: return 100000;
default: return -1;
}
}
-static int mlx5i_get_port_settings(struct net_device *netdev,
- u16 *ib_link_width_oper, u16 *ib_proto_oper)
-{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
- int ret;
-
- ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
- if (ret)
- return ret;
-
- *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
- *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
-
- return 0;
-}
-
static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
{
int rate, width;
@@ -209,11 +194,14 @@ static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
static int mlx5i_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *link_ksettings)
{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
u16 ib_link_width_oper;
u16 ib_proto_oper;
int speed, ret;
- ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+ ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper,
+ 1);
if (ret)
return ret;
@@ -234,7 +222,44 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev,
return 0;
}
+static u32 mlx5i_flow_type_mask(u32 flow_type)
+{
+ return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+}
+
+static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct ethtool_rx_flow_spec *fs = &cmd->fs;
+
+ if (mlx5i_flow_type_mask(fs->flow_type) == ETHER_FLOW)
+ return -EINVAL;
+
+ return mlx5e_ethtool_set_rxnfc(priv, cmd);
+}
+
+static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+ * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+ * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+ * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+ */
+ if (info->cmd == ETHTOOL_GRXRINGS) {
+ info->data = priv->channels.params.num_channels;
+ return 0;
+ }
+
+ return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
+}
+
const struct ethtool_ops mlx5i_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = mlx5i_get_drvinfo,
.get_strings = mlx5i_get_strings,
.get_sset_count = mlx5i_get_sset_count,
@@ -247,6 +272,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
.get_coalesce = mlx5i_get_coalesce,
.set_coalesce = mlx5i_set_coalesce,
.get_ts_info = mlx5i_get_ts_info,
+ .get_rxnfc = mlx5i_get_rxnfc,
+ .set_rxnfc = mlx5i_set_rxnfc,
.get_link_ksettings = mlx5i_get_link_ksettings,
.get_link = ethtool_op_get_link,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 56078b23f1a0..4e3a75496dd9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -33,7 +33,9 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/fs.h>
#include "en.h"
+#include "en/params.h"
#include "ipoib.h"
+#include "en/fs_ethtool.h"
#define IB_DEFAULT_Q_KEY 0xb1b
#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
@@ -49,7 +51,7 @@ static const struct net_device_ops mlx5i_netdev_ops = {
.ndo_init = mlx5i_dev_init,
.ndo_uninit = mlx5i_dev_cleanup,
.ndo_change_mtu = mlx5i_change_mtu,
- .ndo_do_ioctl = mlx5i_ioctl,
+ .ndo_eth_ioctl = mlx5i_ioctl,
};
/* IPoIB mlx5 netdev profile */
@@ -66,29 +68,21 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
- params->lro_en = false;
+ params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
params->tunneled_offload_en = false;
}
/* Called directly after IPoIB netdevice was created to initialize SW structs */
-int mlx5i_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- int err;
-
- err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
- if (err)
- return err;
+ netif_carrier_off(netdev);
mlx5e_set_netdev_mtu_boundaries(priv);
netdev->mtu = netdev->max_mtu;
- mlx5e_build_nic_params(priv, NULL, &priv->rss_params, &priv->channels.params,
- netdev->mtu);
+ mlx5e_build_nic_params(priv, NULL, netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
mlx5e_timestamp_init(priv);
@@ -112,19 +106,19 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
void mlx5i_cleanup(struct mlx5e_priv *priv)
{
- mlx5e_netdev_cleanup(priv->netdev, priv);
+ mlx5e_priv_cleanup(priv);
}
static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
{
- struct mlx5e_sw_stats s = { 0 };
+ struct rtnl_link_stats64 s = {};
int i, j;
- for (i = 0; i < priv->max_nch; i++) {
+ for (i = 0; i < priv->stats_nch; i++) {
struct mlx5e_channel_stats *channel_stats;
struct mlx5e_rq_stats *rq_stats;
- channel_stats = &priv->channel_stats[i];
+ channel_stats = priv->channel_stats[i];
rq_stats = &channel_stats->rq;
s.rx_packets += rq_stats->packets;
@@ -135,11 +129,17 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
s.tx_packets += sq_stats->packets;
s.tx_bytes += sq_stats->bytes;
- s.tx_queue_dropped += sq_stats->dropped;
+ s.tx_dropped += sq_stats->dropped;
}
}
- memcpy(&priv->stats.sw, &s, sizeof(s));
+ memset(&priv->stats.sw, 0, sizeof(s));
+
+ priv->stats.sw.rx_packets = s.rx_packets;
+ priv->stats.sw.rx_bytes = s.rx_bytes;
+ priv->stats.sw.tx_packets = s.tx_packets;
+ priv->stats.sw.tx_bytes = s.tx_bytes;
+ priv->stats.sw.tx_queue_dropped = s.tx_dropped;
}
void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
@@ -160,45 +160,54 @@ int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5i_priv *ipriv = priv->ppriv;
- struct mlx5_core_qp *qp = &ipriv->qp;
- struct mlx5_qp_context *context;
int ret;
- /* QP states */
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
+ {
+ u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+ u32 *qpc;
- context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
- context->pri_path.port = 1;
- context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
- context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
- ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
- if (ret) {
- mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
- goto err_qp_modify_to_err;
- }
- memset(context, 0, sizeof(*context));
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index,
+ ipriv->pkey_index);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
+ MLX5_SET(qpc, qpc, q_key, IB_DEFAULT_Q_KEY);
- ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
- if (ret) {
- mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
- goto err_qp_modify_to_err;
+ MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, in, qpn, ipriv->qpn);
+ ret = mlx5_cmd_exec_in(mdev, rst2init_qp, in);
+ if (ret)
+ goto err_qp_modify_to_err;
}
-
- ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
- if (ret) {
- mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
- goto err_qp_modify_to_err;
+ {
+ u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+
+ MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, in, qpn, ipriv->qpn);
+ ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
+ if (ret)
+ goto err_qp_modify_to_err;
+ }
+ {
+ u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+
+ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, ipriv->qpn);
+ ret = mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
+ if (ret)
+ goto err_qp_modify_to_err;
}
-
- kfree(context);
return 0;
err_qp_modify_to_err:
- mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
- kfree(context);
+ {
+ u32 in[MLX5_ST_SZ_DW(qp_2err_in)] = {};
+
+ MLX5_SET(qp_2err_in, in, opcode, MLX5_CMD_OP_2ERR_QP);
+ MLX5_SET(qp_2err_in, in, qpn, ipriv->qpn);
+ mlx5_cmd_exec_in(mdev, qp_2err, in);
+ }
return ret;
}
@@ -206,31 +215,33 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
{
struct mlx5i_priv *ipriv = priv->ppriv;
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_qp_context context;
- int err;
+ u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
- err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
- &ipriv->qp);
- if (err)
- mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
+ MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
+ MLX5_SET(qp_2rst_in, in, qpn, ipriv->qpn);
+ mlx5_cmd_exec_in(mdev, qp_2rst, in);
}
#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
-int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+int mlx5i_create_underlay_qp(struct mlx5e_priv *priv)
{
- u32 *in = NULL;
+ const unsigned char *dev_addr = priv->netdev->dev_addr;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {};
+ struct mlx5i_priv *ipriv = priv->ppriv;
void *addr_path;
+ int qpn = 0;
int ret = 0;
- int inlen;
void *qpc;
- inlen = MLX5_ST_SZ_BYTES(create_qp_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) {
+ qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3];
+ MLX5_SET(create_qp_in, in, input_qpn, qpn);
+ }
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev));
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
@@ -240,20 +251,28 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp
MLX5_SET(ads, addr_path, vhca_port_num, 1);
MLX5_SET(ads, addr_path, grh, 1);
- ret = mlx5_core_create_qp(mdev, qp, in, inlen);
- if (ret) {
- mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret);
- goto out;
- }
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ ret = mlx5_cmd_exec_inout(priv->mdev, create_qp, in, out);
+ if (ret)
+ return ret;
-out:
- kvfree(in);
- return ret;
+ ipriv->qpn = MLX5_GET(create_qp_out, out, qpn);
+
+ return 0;
+}
+
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qpn);
+ mlx5_cmd_exec_in(mdev, destroy_qp, in);
}
-void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+int mlx5i_update_nic_rx(struct mlx5e_priv *priv)
{
- mlx5_core_destroy_qp(mdev, qp);
+ return mlx5e_refresh_tirs(priv, true, true);
}
int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
@@ -273,13 +292,13 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
struct mlx5i_priv *ipriv = priv->ppriv;
int err;
- err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+ err = mlx5i_create_underlay_qp(priv);
if (err) {
mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err);
return err;
}
- err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0][0]);
+ err = mlx5i_create_tis(priv->mdev, ipriv->qpn, &priv->tisn[0][0]);
if (err) {
mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
goto err_destroy_underlay_qp;
@@ -288,7 +307,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
return 0;
err_destroy_underlay_qp:
- mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+ mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
return err;
}
@@ -297,65 +316,52 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
struct mlx5i_priv *ipriv = priv->ppriv;
mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]);
- mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+ mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
}
static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
{
- struct ttc_params ttc_params = {};
- int tt, err;
+ struct mlx5_flow_namespace *ns =
+ mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
+ int err;
- priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
- if (!priv->fs.ns)
+ if (!ns)
return -EINVAL;
- err = mlx5e_arfs_create_tables(priv);
+ mlx5e_fs_set_ns(priv->fs, ns, false);
+ err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
if (err) {
netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
err);
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- mlx5e_set_ttc_basic_params(priv, &ttc_params);
- mlx5e_set_inner_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
-
- err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ err = mlx5e_create_ttc_table(priv->fs, priv->rx_res);
if (err) {
- netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+ netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
goto err_destroy_arfs_tables;
}
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
- if (err) {
- netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
- err);
- goto err_destroy_inner_ttc_table;
- }
+ mlx5e_ethtool_init_steering(priv->fs);
return 0;
-err_destroy_inner_ttc_table:
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
err_destroy_arfs_tables:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
return err;
}
static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
{
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_destroy_ttc_table(priv->fs);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
+ mlx5e_ethtool_cleanup_steering(priv->fs);
}
static int mlx5i_init_rx(struct mlx5e_priv *priv)
@@ -363,6 +369,19 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res) {
+ err = -ENOMEM;
+ goto err_free_fs;
+ }
+
mlx5e_create_q_counters(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -371,52 +390,41 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
- err = mlx5e_create_indirect_rqt(priv);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, true);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
- if (err)
- goto err_destroy_indirect_tirs;
-
err = mlx5i_create_flow_steering(priv);
if (err)
- goto err_destroy_direct_tirs;
+ goto err_destroy_rx_res;
return 0;
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv, true);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
-err_destroy_indirect_rqts:
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+err_free_fs:
+ mlx5e_fs_cleanup(priv->fs);
return err;
}
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
mlx5i_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
- mlx5e_destroy_indirect_tirs(priv, true);
- mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
- mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+ mlx5e_fs_cleanup(priv->fs);
}
/* The stats groups order is opposite to the update_stats() order calls */
@@ -450,13 +458,11 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.cleanup_rx = mlx5i_cleanup_rx,
.enable = NULL, /* mlx5i_enable */
.disable = NULL, /* mlx5i_disable */
- .update_rx = mlx5e_update_nic_rx,
+ .update_rx = mlx5i_update_nic_rx,
.update_stats = NULL, /* mlx5i_update_stats */
.update_carrier = NULL, /* no HW update in IB link */
- .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
- .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .rx_handlers = &mlx5i_rx_handlers,
.max_tc = MLX5I_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5i_stats_grps,
.stats_grps_num = mlx5i_stats_grps_num,
};
@@ -466,28 +472,19 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- struct mlx5e_channels new_channels = {};
- struct mlx5e_params *params;
+ struct mlx5e_params new_params;
int err = 0;
mutex_lock(&priv->state_lock);
- params = &priv->channels.params;
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- params->sw_mtu = new_mtu;
- netdev->mtu = params->sw_mtu;
- goto out;
- }
-
- new_channels.params = *params;
- new_channels.params.sw_mtu = new_mtu;
+ new_params = priv->channels.params;
+ new_params.sw_mtu = new_mtu;
- err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
if (err)
goto out;
- netdev->mtu = new_channels.params.sw_mtu;
+ netdev->mtu = new_params.sw_mtu;
out:
mutex_unlock(&priv->state_lock);
@@ -498,14 +495,16 @@ int mlx5i_dev_init(struct net_device *dev)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
struct mlx5i_priv *ipriv = priv->ppriv;
+ u8 addr_mod[3];
/* Set dev address using underlay QP */
- dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff;
- dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff;
- dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
+ addr_mod[0] = (ipriv->qpn >> 16) & 0xff;
+ addr_mod[1] = (ipriv->qpn >> 8) & 0xff;
+ addr_mod[2] = (ipriv->qpn) & 0xff;
+ dev_addr_mod(dev, 1, addr_mod, sizeof(addr_mod));
/* Add QPN to net-device mapping to HT */
- mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
+ mlx5i_pkey_add_qpn(dev, ipriv->qpn);
return 0;
}
@@ -532,7 +531,7 @@ void mlx5i_dev_cleanup(struct net_device *dev)
mlx5i_uninit_underlay_qp(priv);
/* Delete QPN to net-device mapping from HT */
- mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
+ mlx5i_pkey_del_qpn(dev, ipriv->qpn);
}
static int mlx5i_open(struct net_device *netdev)
@@ -552,7 +551,7 @@ static int mlx5i_open(struct net_device *netdev)
goto err_clear_state_opened_flag;
}
- err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn);
if (err) {
mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
goto err_reset_qp;
@@ -569,7 +568,7 @@ static int mlx5i_open(struct net_device *netdev)
return 0;
err_remove_fs_underlay_qp:
- mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
err_reset_qp:
mlx5i_uninit_underlay_qp(epriv);
err_clear_state_opened_flag:
@@ -595,7 +594,7 @@ static int mlx5i_close(struct net_device *netdev)
clear_bit(MLX5E_STATE_OPENED, &epriv->state);
netif_carrier_off(epriv->netdev);
- mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
mlx5e_deactivate_priv_channels(epriv);
mlx5e_close_channels(&epriv->channels);
mlx5i_uninit_underlay_qp(epriv);
@@ -614,11 +613,12 @@ static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
struct mlx5i_priv *ipriv = epriv->ppriv;
int err;
- mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
- err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn);
+ mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qpn,
+ gid->raw);
+ err = mlx5_core_attach_mcg(mdev, gid, ipriv->qpn);
if (err)
mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
- ipriv->qp.qpn, gid->raw);
+ ipriv->qpn, gid->raw);
if (set_qkey) {
mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
@@ -637,12 +637,13 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
struct mlx5i_priv *ipriv = epriv->ppriv;
int err;
- mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
+ mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qpn,
+ gid->raw);
- err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
+ err = mlx5_core_detach_mcg(mdev, gid, ipriv->qpn);
if (err)
mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
- ipriv->qp.qpn, gid->raw);
+ ipriv->qpn, gid->raw);
return err;
}
@@ -655,7 +656,9 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
struct mlx5_ib_ah *mah = to_mah(address);
struct mlx5i_priv *ipriv = epriv->ppriv;
- return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more());
+ mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more());
+
+ return NETDEV_TX_OK;
}
static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
@@ -681,6 +684,7 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
static void mlx5_rdma_netdev_free(struct net_device *netdev)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5i_priv *ipriv = priv->ppriv;
const struct mlx5e_profile *profile = priv->profile;
@@ -689,13 +693,13 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
if (!ipriv->sub_interface) {
mlx5i_pkey_qpn_ht_cleanup(netdev);
- mlx5e_destroy_mdev_resources(priv->mdev);
+ mlx5e_destroy_mdev_resources(mdev);
}
}
static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
{
- return mdev->mlx5e_res.pdn != 0;
+ return mdev->mlx5e_res.hw_objs.pdn != 0;
}
static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
@@ -705,7 +709,7 @@ static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
return &mlx5i_nic_profile;
}
-static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
+static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num,
struct net_device *netdev, void *param)
{
struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param;
@@ -732,7 +736,14 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
goto destroy_ht;
}
- prof->init(mdev, netdev, prof, ipriv);
+ err = mlx5e_priv_init(epriv, prof, netdev, mdev);
+ if (err)
+ goto destroy_mdev_resources;
+
+ epriv->profile = prof;
+ epriv->ppriv = ipriv;
+
+ prof->init(mdev, netdev);
err = mlx5e_attach_netdev(epriv);
if (err)
@@ -756,6 +767,7 @@ detach:
prof->cleanup(epriv);
if (ipriv->sub_interface)
return err;
+destroy_mdev_resources:
mlx5e_destroy_mdev_resources(mdev);
destroy_ht:
mlx5i_pkey_qpn_ht_cleanup(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index c87962cab921..99d46fda9f82 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -42,6 +42,7 @@
extern const struct ethtool_ops mlx5i_ethtool_ops;
extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
+extern const struct mlx5e_rx_handlers mlx5i_rx_handlers;
#define MLX5_IB_GRH_BYTES 40
#define MLX5_IPOIB_ENCAP_LEN 4
@@ -51,19 +52,19 @@ extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
/* ipoib rdma netdev's private data structure */
struct mlx5i_priv {
struct rdma_netdev rn; /* keep this first */
- struct mlx5_core_qp qp;
+ u32 qpn;
bool sub_interface;
u32 qkey;
u16 pkey_index;
struct mlx5i_pkey_qpn_ht *qpn_htbl;
- char *mlx5e_priv[0];
+ char *mlx5e_priv[];
};
int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
/* Underlay QP create/destroy functions */
-int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
-void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+int mlx5i_create_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn);
/* Underlay QP state modification init/uninit functions */
int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
@@ -86,12 +87,11 @@ void mlx5i_dev_cleanup(struct net_device *dev);
int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
/* Parent profile functions */
-int mlx5i_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv);
+int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev);
void mlx5i_cleanup(struct mlx5e_priv *priv);
+int mlx5i_update_nic_rx(struct mlx5e_priv *priv);
+
/* Get child interface nic profile */
const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
@@ -107,23 +107,14 @@ struct mlx5i_tx_wqe {
struct mlx5_wqe_datagram_seg datagram;
struct mlx5_wqe_eth_pad pad;
struct mlx5_wqe_eth_seg eth;
- struct mlx5_wqe_data_seg data[0];
+ struct mlx5_wqe_data_seg data[];
};
-static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
- struct mlx5i_tx_wqe **wqe,
- u16 pi)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
-
- *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- memset(*wqe, 0, sizeof(**wqe));
-}
+#define MLX5I_SQ_FETCH_WQE(sq, pi) \
+ ((struct mlx5i_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5i_tx_wqe)))
-netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5_av *av, u32 dqpn, u32 dqkey,
- bool xmit_more);
-void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more);
void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
#endif /* CONFIG_MLX5_CORE_IPOIB */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 96e64187c089..0227a521d301 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -149,7 +149,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = {
.ndo_get_stats64 = mlx5i_get_stats,
.ndo_uninit = mlx5i_pkey_dev_cleanup,
.ndo_change_mtu = mlx5i_pkey_change_mtu,
- .ndo_do_ioctl = mlx5i_pkey_ioctl,
+ .ndo_eth_ioctl = mlx5i_pkey_ioctl,
};
/* Child NDOs */
@@ -204,13 +204,13 @@ static int mlx5i_pkey_open(struct net_device *netdev)
goto err_release_lock;
}
- err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn);
if (err) {
mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
goto err_unint_underlay_qp;
}
- err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0][0]);
+ err = mlx5i_create_tis(mdev, ipriv->qpn, &epriv->tisn[0][0]);
if (err) {
mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
goto err_remove_rx_uderlay_qp;
@@ -230,7 +230,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
err_clear_state_opened_flag:
mlx5e_destroy_tis(mdev, epriv->tisn[0][0]);
err_remove_rx_uderlay_qp:
- mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
err_unint_underlay_qp:
mlx5i_uninit_underlay_qp(epriv);
err_release_lock:
@@ -253,7 +253,7 @@ static int mlx5i_pkey_close(struct net_device *netdev)
clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev);
- mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
mlx5i_uninit_underlay_qp(priv);
mlx5e_deactivate_priv_channels(priv);
mlx5e_close_channels(&priv->channels);
@@ -276,14 +276,12 @@ static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
/* Called directly after IPoIB netdevice was created to initialize SW structs */
static int mlx5i_pkey_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+ struct net_device *netdev)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
int err;
- err = mlx5i_init(mdev, netdev, profile, ppriv);
+ err = mlx5i_init(mdev, netdev);
if (err)
return err;
@@ -307,23 +305,20 @@ static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
{
- struct mlx5i_priv *ipriv = priv->ppriv;
int err;
- err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
- if (err) {
+ err = mlx5i_create_underlay_qp(priv);
+ if (err)
mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
- return err;
- }
- return 0;
+ return err;
}
static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
{
struct mlx5i_priv *ipriv = priv->ppriv;
- mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+ mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
}
static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
@@ -350,12 +345,10 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.cleanup_rx = mlx5i_pkey_cleanup_rx,
.enable = NULL,
.disable = NULL,
- .update_rx = mlx5e_update_nic_rx,
+ .update_rx = mlx5i_update_nic_rx,
.update_stats = NULL,
- .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
- .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .rx_handlers = &mlx5i_rx_handlers,
.max_tc = MLX5I_MAX_NUM_TC,
- .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
};
const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
new file mode 100644
index 000000000000..380a208ab137
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+
+static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]--;
+}
+
+static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]++;
+}
+
+/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
+static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask)
+{
+ int best_cpu = -1;
+ int cpu;
+
+ for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
+ /* CPU has zero IRQs on it. No need to search any more CPUs. */
+ if (!pool->irqs_per_cpu[cpu]) {
+ best_cpu = cpu;
+ break;
+ }
+ if (best_cpu < 0)
+ best_cpu = cpu;
+ if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
+ best_cpu = cpu;
+ }
+ if (best_cpu == -1) {
+ /* There isn't online CPUs in req_mask */
+ mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
+ cpumask_pr_args(req_mask));
+ best_cpu = cpumask_first(cpu_online_mask);
+ }
+ pool->irqs_per_cpu[best_cpu]++;
+ return best_cpu;
+}
+
+/* Creating an IRQ from irq_pool */
+static struct mlx5_irq *
+irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ cpumask_var_t auto_mask;
+ struct mlx5_irq *irq;
+ u32 irq_index;
+ int err;
+
+ if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
+ if (err)
+ return ERR_PTR(err);
+ if (pool->irqs_per_cpu) {
+ if (cpumask_weight(req_mask) > 1)
+ /* if req_mask contain more then one CPU, set the least loadad CPU
+ * of req_mask
+ */
+ cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ else
+ cpu_get(pool, cpumask_first(req_mask));
+ }
+ irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
+ free_cpumask_var(auto_mask);
+ return irq;
+}
+
+/* Looking for the IRQ with the smallest refcount that fits req_mask.
+ * If pool is sf_comp_pool, then we are looking for an IRQ with any of the
+ * requested CPUs in req_mask.
+ * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
+ * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
+ * we don't skip it.
+ * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
+ * fit. And since mask is subset of itself, we will pass the first if bellow.
+ */
+static struct mlx5_irq *
+irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ int start = pool->xa_num_irqs.min;
+ int end = pool->xa_num_irqs.max;
+ struct mlx5_irq *irq = NULL;
+ struct mlx5_irq *iter;
+ int irq_refcount = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&pool->lock);
+ xa_for_each_range(&pool->irqs, index, iter, start, end) {
+ struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
+ int iter_refcount = mlx5_irq_read_locked(iter);
+
+ if (!cpumask_subset(iter_mask, req_mask))
+ /* skip IRQs with a mask which is not subset of req_mask */
+ continue;
+ if (iter_refcount < pool->min_threshold)
+ /* If we found an IRQ with less than min_thres, return it */
+ return iter;
+ if (!irq || iter_refcount < irq_refcount) {
+ /* In case we won't find an IRQ with less than min_thres,
+ * keep a pointer to the least used IRQ
+ */
+ irq_refcount = iter_refcount;
+ irq = iter;
+ }
+ }
+ return irq;
+}
+
+/**
+ * mlx5_irq_affinity_request - request an IRQ according to the given mask.
+ * @pool: IRQ pool to request from.
+ * @req_mask: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ struct mlx5_irq *least_loaded_irq, *new_irq;
+
+ mutex_lock(&pool->lock);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ if (least_loaded_irq &&
+ mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
+ goto out;
+ /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
+ new_irq = irq_pool_request_irq(pool, req_mask);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+ /* We failed to create an IRQ and we didn't find an IRQ */
+ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
+ PTR_ERR(new_irq));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+ /* We failed to create a new IRQ for the requested affinity,
+ * sharing existing IRQ.
+ */
+ goto out;
+ }
+ least_loaded_irq = new_irq;
+ goto unlock;
+out:
+ mlx5_irq_get_locked(least_loaded_irq);
+ if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold)
+ mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+ pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(least_loaded_irq)), pool->name,
+ mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+ mutex_unlock(&pool->lock);
+ return least_loaded_irq;
+}
+
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ int i;
+
+ for (i = 0; i < num_irqs; i++) {
+ int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
+
+ synchronize_irq(pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(irqs[i])));
+ if (mlx5_irq_put(irqs[i]))
+ if (pool->irqs_per_cpu)
+ cpu_put(pool, cpu);
+ }
+}
+
+/**
+ * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bounded to at most 1 CPU.
+ * This function is requesting IRQs according to the default assignment.
+ * The default assignment policy is:
+ * - in each iteration, request the least loaded IRQ which is not bound to any
+ * CPU of the previous IRQs requested.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i = 0;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_copy(req_mask, cpu_online_mask);
+ for (i = 0; i < nirqs; i++) {
+ if (mlx5_irq_pool_is_sf_pool(pool))
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ else
+ /* In case SF pool doesn't exists, fallback to the PF IRQs.
+ * The PF IRQs are already allocated and binded to CPU
+ * at this point. Hence, only an index is needed.
+ */
+ irq = mlx5_irq_request(dev, i, NULL);
+ if (IS_ERR(irq))
+ break;
+ irqs[i] = irq;
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+ cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+ mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
+ }
+ free_cpumask_var(req_mask);
+ if (!i)
+ return PTR_ERR(irq);
+ return i;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
deleted file mode 100644
index 8e19f6ab8393..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ /dev/null
@@ -1,772 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/netdevice.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/vport.h>
-#include "mlx5_core.h"
-#include "eswitch.h"
-#include "lag.h"
-#include "lag_mp.h"
-
-/* General purpose, use for short periods of time.
- * Beware of lock dependencies (preferably, no locks should be acquired
- * under it).
- */
-static DEFINE_MUTEX(lag_mutex);
-
-static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
-{
- u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
- void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
-
- MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
-
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
-{
- u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
- void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
-
- MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
- MLX5_SET(modify_lag_in, in, field_select, 0x1);
-
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
-
- MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
-
- MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
-
-int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
-
- MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
-
-static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
- bool reset, void *out, int out_size)
-{
- u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
- MLX5_SET(query_cong_statistics_in, in, opcode,
- MLX5_CMD_OP_QUERY_CONG_STATISTICS);
- MLX5_SET(query_cong_statistics_in, in, clear, reset);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
-int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
- struct net_device *ndev)
-{
- int i;
-
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].netdev == ndev)
- return i;
-
- return -1;
-}
-
-static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
-{
- return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
-}
-
-static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
-{
- return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
-}
-
-static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
- u8 *port1, u8 *port2)
-{
- *port1 = 1;
- *port2 = 2;
- if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
- !tracker->netdev_state[MLX5_LAG_P1].link_up) {
- *port1 = 2;
- return;
- }
-
- if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
- !tracker->netdev_state[MLX5_LAG_P2].link_up)
- *port2 = 1;
-}
-
-void mlx5_modify_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
-{
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- u8 v2p_port1, v2p_port2;
- int err;
-
- mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
- &v2p_port2);
-
- if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
- v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
- ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
- ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
-
- mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
-
- err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
- if (err)
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
- }
-}
-
-static int mlx5_create_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
-{
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- int err;
-
- mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
- &ldev->v2p_map[MLX5_LAG_P2]);
-
- mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
-
- err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
- if (err)
- mlx5_core_err(dev0,
- "Failed to create LAG (%d)\n",
- err);
- return err;
-}
-
-int mlx5_activate_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker,
- u8 flags)
-{
- bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- int err;
-
- err = mlx5_create_lag(ldev, tracker);
- if (err) {
- if (roce_lag) {
- mlx5_core_err(dev0,
- "Failed to activate RoCE LAG\n");
- } else {
- mlx5_core_err(dev0,
- "Failed to activate VF LAG\n"
- "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
- }
- return err;
- }
-
- ldev->flags |= flags;
- return 0;
-}
-
-static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
-{
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- bool roce_lag = __mlx5_lag_is_roce(ldev);
- int err;
-
- ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
-
- err = mlx5_cmd_destroy_lag(dev0);
- if (err) {
- if (roce_lag) {
- mlx5_core_err(dev0,
- "Failed to deactivate RoCE LAG; driver restart required\n");
- } else {
- mlx5_core_err(dev0,
- "Failed to deactivate VF LAG; driver restart required\n"
- "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
- }
- }
-
- return err;
-}
-
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
-{
- if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
- return false;
-
-#ifdef CONFIG_MLX5_ESWITCH
- return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
- ldev->pf[MLX5_LAG_P2].dev);
-#else
- return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
- !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
-#endif
-}
-
-static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
-{
- int i;
-
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- mlx5_add_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
-}
-
-static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
-{
- int i;
-
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
-}
-
-static void mlx5_do_bond(struct mlx5_lag *ldev)
-{
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
- struct lag_tracker tracker;
- bool do_bond, roce_lag;
- int err;
-
- if (!dev0 || !dev1)
- return;
-
- mutex_lock(&lag_mutex);
- tracker = ldev->tracker;
- mutex_unlock(&lag_mutex);
-
- do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
-
- if (do_bond && !__mlx5_lag_is_active(ldev)) {
- roce_lag = !mlx5_sriov_is_enabled(dev0) &&
- !mlx5_sriov_is_enabled(dev1);
-
-#ifdef CONFIG_MLX5_ESWITCH
- roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
-#endif
-
- if (roce_lag)
- mlx5_lag_remove_ib_devices(ldev);
-
- err = mlx5_activate_lag(ldev, &tracker,
- roce_lag ? MLX5_LAG_FLAG_ROCE :
- MLX5_LAG_FLAG_SRIOV);
- if (err) {
- if (roce_lag)
- mlx5_lag_add_ib_devices(ldev);
-
- return;
- }
-
- if (roce_lag) {
- mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_enable_roce(dev1);
- }
- } else if (do_bond && __mlx5_lag_is_active(ldev)) {
- mlx5_modify_lag(ldev, &tracker);
- } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
- roce_lag = __mlx5_lag_is_roce(ldev);
-
- if (roce_lag) {
- mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_disable_roce(dev1);
- }
-
- err = mlx5_deactivate_lag(ldev);
- if (err)
- return;
-
- if (roce_lag)
- mlx5_lag_add_ib_devices(ldev);
- }
-}
-
-static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
-{
- queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
-}
-
-static void mlx5_do_bond_work(struct work_struct *work)
-{
- struct delayed_work *delayed_work = to_delayed_work(work);
- struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
- bond_work);
- int status;
-
- status = mlx5_dev_list_trylock();
- if (!status) {
- /* 1 sec delay. */
- mlx5_queue_bond_work(ldev, HZ);
- return;
- }
-
- mlx5_do_bond(ldev);
- mlx5_dev_list_unlock();
-}
-
-static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
- struct lag_tracker *tracker,
- struct net_device *ndev,
- struct netdev_notifier_changeupper_info *info)
-{
- struct net_device *upper = info->upper_dev, *ndev_tmp;
- struct netdev_lag_upper_info *lag_upper_info = NULL;
- bool is_bonded;
- int bond_status = 0;
- int num_slaves = 0;
- int idx;
-
- if (!netif_is_lag_master(upper))
- return 0;
-
- if (info->linking)
- lag_upper_info = info->upper_info;
-
- /* The event may still be of interest if the slave does not belong to
- * us, but is enslaved to a master which has one or more of our netdevs
- * as slaves (e.g., if a new slave is added to a master that bonds two
- * of our netdevs, we should unbond).
- */
- rcu_read_lock();
- for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
- idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
- if (idx > -1)
- bond_status |= (1 << idx);
-
- num_slaves++;
- }
- rcu_read_unlock();
-
- /* None of this lagdev's netdevs are slaves of this master. */
- if (!(bond_status & 0x3))
- return 0;
-
- if (lag_upper_info)
- tracker->tx_type = lag_upper_info->tx_type;
-
- /* Determine bonding status:
- * A device is considered bonded if both its physical ports are slaves
- * of the same lag master, and only them.
- * Lag mode must be activebackup or hash.
- */
- is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
- (bond_status == 0x3) &&
- ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
- (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
-
- if (tracker->is_bonded != is_bonded) {
- tracker->is_bonded = is_bonded;
- return 1;
- }
-
- return 0;
-}
-
-static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
- struct lag_tracker *tracker,
- struct net_device *ndev,
- struct netdev_notifier_changelowerstate_info *info)
-{
- struct netdev_lag_lower_state_info *lag_lower_info;
- int idx;
-
- if (!netif_is_lag_port(ndev))
- return 0;
-
- idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
- if (idx == -1)
- return 0;
-
- /* This information is used to determine virtual to physical
- * port mapping.
- */
- lag_lower_info = info->lower_state_info;
- if (!lag_lower_info)
- return 0;
-
- tracker->netdev_state[idx] = *lag_lower_info;
-
- return 1;
-}
-
-static int mlx5_lag_netdev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- struct lag_tracker tracker;
- struct mlx5_lag *ldev;
- int changed = 0;
-
- if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
- return NOTIFY_DONE;
-
- ldev = container_of(this, struct mlx5_lag, nb);
- tracker = ldev->tracker;
-
- switch (event) {
- case NETDEV_CHANGEUPPER:
- changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
- ptr);
- break;
- case NETDEV_CHANGELOWERSTATE:
- changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
- ndev, ptr);
- break;
- }
-
- mutex_lock(&lag_mutex);
- ldev->tracker = tracker;
- mutex_unlock(&lag_mutex);
-
- if (changed)
- mlx5_queue_bond_work(ldev, 0);
-
- return NOTIFY_DONE;
-}
-
-static struct mlx5_lag *mlx5_lag_dev_alloc(void)
-{
- struct mlx5_lag *ldev;
-
- ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
- if (!ldev)
- return NULL;
-
- ldev->wq = create_singlethread_workqueue("mlx5_lag");
- if (!ldev->wq) {
- kfree(ldev);
- return NULL;
- }
-
- INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
-
- return ldev;
-}
-
-static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
-{
- destroy_workqueue(ldev->wq);
- kfree(ldev);
-}
-
-static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
- struct mlx5_core_dev *dev,
- struct net_device *netdev)
-{
- unsigned int fn = PCI_FUNC(dev->pdev->devfn);
-
- if (fn >= MLX5_MAX_PORTS)
- return;
-
- mutex_lock(&lag_mutex);
- ldev->pf[fn].dev = dev;
- ldev->pf[fn].netdev = netdev;
- ldev->tracker.netdev_state[fn].link_up = 0;
- ldev->tracker.netdev_state[fn].tx_enabled = 0;
-
- dev->priv.lag = ldev;
-
- mutex_unlock(&lag_mutex);
-}
-
-static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
- struct mlx5_core_dev *dev)
-{
- int i;
-
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev == dev)
- break;
-
- if (i == MLX5_MAX_PORTS)
- return;
-
- mutex_lock(&lag_mutex);
- memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
-
- dev->priv.lag = NULL;
- mutex_unlock(&lag_mutex);
-}
-
-/* Must be called with intf_mutex held */
-void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
-{
- struct mlx5_lag *ldev = NULL;
- struct mlx5_core_dev *tmp_dev;
- int err;
-
- if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
- !MLX5_CAP_GEN(dev, lag_master) ||
- (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
- return;
-
- tmp_dev = mlx5_get_next_phys_dev(dev);
- if (tmp_dev)
- ldev = tmp_dev->priv.lag;
-
- if (!ldev) {
- ldev = mlx5_lag_dev_alloc();
- if (!ldev) {
- mlx5_core_err(dev, "Failed to alloc lag dev\n");
- return;
- }
- }
-
- mlx5_lag_dev_add_pf(ldev, dev, netdev);
-
- if (!ldev->nb.notifier_call) {
- ldev->nb.notifier_call = mlx5_lag_netdev_event;
- if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
- ldev->nb.notifier_call = NULL;
- mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
- }
- }
-
- err = mlx5_lag_mp_init(ldev);
- if (err)
- mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
- err);
-}
-
-/* Must be called with intf_mutex held */
-void mlx5_lag_remove(struct mlx5_core_dev *dev)
-{
- struct mlx5_lag *ldev;
- int i;
-
- ldev = mlx5_lag_dev_get(dev);
- if (!ldev)
- return;
-
- if (__mlx5_lag_is_active(ldev))
- mlx5_deactivate_lag(ldev);
-
- mlx5_lag_dev_remove_pf(ldev, dev);
-
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- break;
-
- if (i == MLX5_MAX_PORTS) {
- if (ldev->nb.notifier_call)
- unregister_netdevice_notifier_net(&init_net, &ldev->nb);
- mlx5_lag_mp_cleanup(ldev);
- cancel_delayed_work_sync(&ldev->bond_work);
- mlx5_lag_dev_free(ldev);
- }
-}
-
-bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
-{
- struct mlx5_lag *ldev;
- bool res;
-
- mutex_lock(&lag_mutex);
- ldev = mlx5_lag_dev_get(dev);
- res = ldev && __mlx5_lag_is_roce(ldev);
- mutex_unlock(&lag_mutex);
-
- return res;
-}
-EXPORT_SYMBOL(mlx5_lag_is_roce);
-
-bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
-{
- struct mlx5_lag *ldev;
- bool res;
-
- mutex_lock(&lag_mutex);
- ldev = mlx5_lag_dev_get(dev);
- res = ldev && __mlx5_lag_is_active(ldev);
- mutex_unlock(&lag_mutex);
-
- return res;
-}
-EXPORT_SYMBOL(mlx5_lag_is_active);
-
-bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
-{
- struct mlx5_lag *ldev;
- bool res;
-
- mutex_lock(&lag_mutex);
- ldev = mlx5_lag_dev_get(dev);
- res = ldev && __mlx5_lag_is_sriov(ldev);
- mutex_unlock(&lag_mutex);
-
- return res;
-}
-EXPORT_SYMBOL(mlx5_lag_is_sriov);
-
-void mlx5_lag_update(struct mlx5_core_dev *dev)
-{
- struct mlx5_lag *ldev;
-
- mlx5_dev_list_lock();
- ldev = mlx5_lag_dev_get(dev);
- if (!ldev)
- goto unlock;
-
- mlx5_do_bond(ldev);
-
-unlock:
- mlx5_dev_list_unlock();
-}
-
-struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
-{
- struct net_device *ndev = NULL;
- struct mlx5_lag *ldev;
-
- mutex_lock(&lag_mutex);
- ldev = mlx5_lag_dev_get(dev);
-
- if (!(ldev && __mlx5_lag_is_roce(ldev)))
- goto unlock;
-
- if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
- ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
- ldev->pf[MLX5_LAG_P1].netdev :
- ldev->pf[MLX5_LAG_P2].netdev;
- } else {
- ndev = ldev->pf[MLX5_LAG_P1].netdev;
- }
- if (ndev)
- dev_hold(ndev);
-
-unlock:
- mutex_unlock(&lag_mutex);
-
- return ndev;
-}
-EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
-
-bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
- priv);
- struct mlx5_lag *ldev;
-
- if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
- return true;
-
- ldev = mlx5_lag_dev_get(dev);
- if (!ldev || !__mlx5_lag_is_roce(ldev) ||
- ldev->pf[MLX5_LAG_P1].dev == dev)
- return true;
-
- /* If bonded, we do not add an IB device for PF1. */
- return false;
-}
-
-int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
- u64 *values,
- int num_counters,
- size_t *offsets)
-{
- int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
- struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
- struct mlx5_lag *ldev;
- int num_ports;
- int ret, i, j;
- void *out;
-
- out = kvzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- memset(values, 0, sizeof(*values) * num_counters);
-
- mutex_lock(&lag_mutex);
- ldev = mlx5_lag_dev_get(dev);
- if (ldev && __mlx5_lag_is_roce(ldev)) {
- num_ports = MLX5_MAX_PORTS;
- mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
- mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
- } else {
- num_ports = 1;
- mdev[MLX5_LAG_P1] = dev;
- }
-
- for (i = 0; i < num_ports; ++i) {
- ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
- if (ret)
- goto unlock;
-
- for (j = 0; j < num_counters; ++j)
- values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
- }
-
-unlock:
- mutex_unlock(&lag_mutex);
- kvfree(out);
- return ret;
-}
-EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
deleted file mode 100644
index f1068aac6406..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#ifndef __MLX5_LAG_H__
-#define __MLX5_LAG_H__
-
-#include "mlx5_core.h"
-#include "lag_mp.h"
-
-enum {
- MLX5_LAG_P1,
- MLX5_LAG_P2,
-};
-
-enum {
- MLX5_LAG_FLAG_ROCE = 1 << 0,
- MLX5_LAG_FLAG_SRIOV = 1 << 1,
- MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
-};
-
-#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
- MLX5_LAG_FLAG_MULTIPATH)
-
-struct lag_func {
- struct mlx5_core_dev *dev;
- struct net_device *netdev;
-};
-
-/* Used for collection of netdev event info. */
-struct lag_tracker {
- enum netdev_lag_tx_type tx_type;
- struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
- unsigned int is_bonded:1;
-};
-
-/* LAG data of a ConnectX card.
- * It serves both its phys functions.
- */
-struct mlx5_lag {
- u8 flags;
- u8 v2p_map[MLX5_MAX_PORTS];
- struct lag_func pf[MLX5_MAX_PORTS];
- struct lag_tracker tracker;
- struct workqueue_struct *wq;
- struct delayed_work bond_work;
- struct notifier_block nb;
- struct lag_mp lag_mp;
-};
-
-static inline struct mlx5_lag *
-mlx5_lag_dev_get(struct mlx5_core_dev *dev)
-{
- return dev->priv.lag;
-}
-
-static inline bool
-__mlx5_lag_is_active(struct mlx5_lag *ldev)
-{
- return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
-}
-
-void mlx5_modify_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker);
-int mlx5_activate_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker,
- u8 flags);
-int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
- struct net_device *ndev);
-
-#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
new file mode 100644
index 000000000000..b8feaf0f5c4c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lag.h"
+
+static char *get_str_mode_type(struct mlx5_lag *ldev)
+{
+ switch (ldev->mode) {
+ case MLX5_LAG_MODE_ROCE: return "roce";
+ case MLX5_LAG_MODE_SRIOV: return "switchdev";
+ case MLX5_LAG_MODE_MULTIPATH: return "multipath";
+ case MLX5_LAG_MODE_MPESW: return "multiport_eswitch";
+ default: return "invalid";
+ }
+
+ return NULL;
+}
+
+static int type_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ char *mode = NULL;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = get_str_mode_type(ldev);
+ mutex_unlock(&ldev->lock);
+ if (!mode)
+ return -EINVAL;
+ seq_printf(file, "%s\n", mode);
+
+ return 0;
+}
+
+static int port_sel_mode_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int ret = 0;
+ char *mode;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&ldev->lock);
+ if (ret)
+ return ret;
+
+ seq_printf(file, "%s\n", mode);
+ return 0;
+}
+
+static int state_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ bool active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+ seq_printf(file, "%s\n", active ? "active" : "disabled");
+ return 0;
+}
+
+static int flags_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ bool fdb_sel_mode_native;
+ struct mlx5_lag *ldev;
+ bool shared_fdb;
+ bool lag_active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (!lag_active)
+ goto unlock;
+
+ shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &ldev->mode_flags);
+
+unlock:
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
+ seq_printf(file, "%s:%s\n", "fdb_selection_mode",
+ fdb_sel_mode_native ? "native" : "affinity");
+ return 0;
+}
+
+static int mapping_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ u8 ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_lag *ldev;
+ bool hash = false;
+ bool lag_active;
+ int num_ports;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (lag_active) {
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
+ &num_ports);
+ hash = true;
+ } else {
+ for (i = 0; i < ldev->ports; i++)
+ ports[i] = ldev->v2p_map[i];
+ num_ports = ldev->ports;
+ }
+ }
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ for (i = 0; i < num_ports; i++) {
+ if (hash)
+ seq_printf(file, "%d\n", ports[i] + 1);
+ else
+ seq_printf(file, "%d:%d\n", i + 1, ports[i]);
+ }
+
+ return 0;
+}
+
+static int members_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+ seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
+ }
+ mutex_unlock(&ldev->lock);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(type);
+DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
+DEFINE_SHOW_ATTRIBUTE(state);
+DEFINE_SHOW_ATTRIBUTE(flags);
+DEFINE_SHOW_ATTRIBUTE(mapping);
+DEFINE_SHOW_ATTRIBUTE(members);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
+{
+ struct dentry *dbg;
+
+ dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
+ dev->priv.dbg.lag_debugfs = dbg;
+
+ debugfs_create_file("type", 0444, dbg, dev, &type_fops);
+ debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
+ debugfs_create_file("state", 0444, dbg, dev, &state_fops);
+ debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
+ debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
+ debugfs_create_file("members", 0444, dbg, dev, &members_fops);
+}
+
+void mlx5_ldev_remove_debugfs(struct dentry *dbg)
+{
+ debugfs_remove_recursive(dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
new file mode 100644
index 000000000000..a9f4ede4a9bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -0,0 +1,1577 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <net/bonding.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "lag.h"
+#include "mp.h"
+#include "mpesw.h"
+
+enum {
+ MLX5_LAG_EGRESS_PORT_1 = 1,
+ MLX5_LAG_EGRESS_PORT_2,
+};
+
+/* General purpose, use for short periods of time.
+ * Beware of lock dependencies (preferably, no locks should be acquired
+ * under it).
+ */
+static DEFINE_SPINLOCK(lag_lock);
+
+static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
+{
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
+
+ return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
+}
+
+static u8 lag_active_port_bits(struct mlx5_lag *ldev)
+{
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ u8 active_port = 0;
+ int num_enabled;
+ int idx;
+
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (idx = 0; idx < num_enabled; idx++)
+ active_port |= BIT_MASK(enabled_ports[idx]);
+
+ return active_port;
+}
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
+ unsigned long flags)
+{
+ bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &flags);
+ int port_sel_mode = get_port_sel_mode(mode, flags);
+ u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
+ void *lag_ctx;
+
+ lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+ MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
+
+ switch (port_sel_mode) {
+ case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
+ break;
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
+ if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
+ break;
+
+ MLX5_SET(lagc, lag_ctx, active_port,
+ lag_active_port_bits(mlx5_lag_dev(dev)));
+ break;
+ default:
+ break;
+ }
+ MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
+
+ return mlx5_cmd_exec_in(dev, create_lag, in);
+}
+
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
+ u8 *ports)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
+ void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x1);
+
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
+
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
+}
+
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
+
+ MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
+
+ return mlx5_cmd_exec_in(dev, create_vport_lag, in);
+}
+EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
+
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
+
+ MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
+
+ return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
+}
+EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+
+static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_disabled)
+{
+ int i;
+
+ *num_disabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (!tracker->netdev_state[i].tx_enabled ||
+ !tracker->netdev_state[i].link_up)
+ ports[(*num_disabled)++] = i;
+ }
+}
+
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled)
+{
+ int i;
+
+ *num_enabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ ports[(*num_enabled)++] = i;
+ }
+
+ if (*num_enabled == 0)
+ mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
+}
+
+static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
+ struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ unsigned long flags)
+{
+ char buf[MLX5_MAX_PORTS * 10 + 1] = {};
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ int written = 0;
+ int num_enabled;
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
+ mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (i = 0; i < num_enabled; i++) {
+ err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
+ if (err != 3)
+ return;
+ written += err;
+ }
+ buf[written - 2] = 0;
+ mlx5_core_info(dev, "lag map active ports: %s\n", buf);
+ } else {
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ err = scnprintf(buf + written, 10,
+ " port %d:%d", i + 1, ldev->v2p_map[idx]);
+ if (err != 9)
+ return;
+ written += err;
+ }
+ }
+ mlx5_core_info(dev, "lag map:%s\n", buf);
+ }
+}
+
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+static void mlx5_do_bond_work(struct work_struct *work);
+
+static void mlx5_ldev_free(struct kref *ref)
+{
+ struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
+
+ if (ldev->nb.notifier_call)
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+ mlx5_lag_mp_cleanup(ldev);
+ mlx5_lag_mpesw_cleanup(ldev);
+ cancel_work_sync(&ldev->mpesw_work);
+ destroy_workqueue(ldev->wq);
+ mutex_destroy(&ldev->lock);
+ kfree(ldev);
+}
+
+static void mlx5_ldev_put(struct mlx5_lag *ldev)
+{
+ kref_put(&ldev->ref, mlx5_ldev_free);
+}
+
+static void mlx5_ldev_get(struct mlx5_lag *ldev)
+{
+ kref_get(&ldev->ref);
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ int err;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return NULL;
+
+ ldev->wq = create_singlethread_workqueue("mlx5_lag");
+ if (!ldev->wq) {
+ kfree(ldev);
+ return NULL;
+ }
+
+ kref_init(&ldev->ref);
+ mutex_init(&ldev->lock);
+ INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+
+ ldev->nb.notifier_call = mlx5_lag_netdev_event;
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
+ ldev->nb.notifier_call = NULL;
+ mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+ }
+ ldev->mode = MLX5_LAG_MODE_NONE;
+
+ err = mlx5_lag_mp_init(ldev);
+ if (err)
+ mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+ err);
+
+ mlx5_lag_mpesw_init(ldev);
+ ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
+ ldev->buckets = 1;
+
+ return ldev;
+}
+
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].netdev == ndev)
+ return i;
+
+ return -ENOENT;
+}
+
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
+{
+ return ldev->mode == MLX5_LAG_MODE_ROCE;
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
+{
+ return ldev->mode == MLX5_LAG_MODE_SRIOV;
+}
+
+/* Create a mapping between steering slots and active ports.
+ * As we have ldev->buckets slots per port first assume the native
+ * mapping should be used.
+ * If there are ports that are disabled fill the relevant slots
+ * with mapping that points to active ports.
+ */
+static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+ u8 num_ports,
+ u8 buckets,
+ u8 *ports)
+{
+ int disabled[MLX5_MAX_PORTS] = {};
+ int enabled[MLX5_MAX_PORTS] = {};
+ int disabled_ports_num = 0;
+ int enabled_ports_num = 0;
+ int idx;
+ u32 rand;
+ int i;
+ int j;
+
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ enabled[enabled_ports_num++] = i;
+ else
+ disabled[disabled_ports_num++] = i;
+ }
+
+ /* Use native mapping by default where each port's buckets
+ * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+ */
+ for (i = 0; i < num_ports; i++)
+ for (j = 0; j < buckets; j++) {
+ idx = i * buckets + j;
+ ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
+ }
+
+ /* If all ports are disabled/enabled keep native mapping */
+ if (enabled_ports_num == num_ports ||
+ disabled_ports_num == num_ports)
+ return;
+
+ /* Go over the disabled ports and for each assign a random active port */
+ for (i = 0; i < disabled_ports_num; i++) {
+ for (j = 0; j < buckets; j++) {
+ get_random_bytes(&rand, 4);
+ ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+ }
+ }
+}
+
+static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].has_drop)
+ return true;
+ return false;
+}
+
+static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].has_drop)
+ continue;
+
+ mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ ldev->pf[i].has_drop = false;
+ }
+}
+
+static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ u8 disabled_ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_core_dev *dev;
+ int disabled_index;
+ int num_disabled;
+ int err;
+ int i;
+
+ /* First delete the current drop rule so there won't be any dropped
+ * packets
+ */
+ mlx5_lag_drop_rule_cleanup(ldev);
+
+ if (!ldev->tracker.has_inactive)
+ return;
+
+ mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
+
+ for (i = 0; i < num_disabled; i++) {
+ disabled_index = disabled_ports[i];
+ dev = ldev->pf[disabled_index].dev;
+ err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ if (!err)
+ ldev->pf[disabled_index].has_drop = true;
+ else
+ mlx5_core_err(dev,
+ "Failed to create lag drop rule, error: %d", err);
+ }
+}
+
+static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
+ void *lag_ctx;
+
+ lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x2);
+
+ MLX5_SET(lagc, lag_ctx, active_port, ports);
+
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
+}
+
+static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ u8 active_ports;
+ int ret;
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
+ ret = mlx5_lag_port_sel_modify(ldev, ports);
+ if (ret ||
+ !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
+ return ret;
+
+ active_ports = lag_active_port_bits(ldev);
+
+ return mlx5_cmd_modify_active_port(dev0, active_ports);
+ }
+ return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
+}
+
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
+
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ports[idx] == ldev->v2p_map[idx])
+ continue;
+ err = _mlx5_modify_lag(ldev, ports);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ return;
+ }
+ memcpy(ldev->v2p_map, ports, sizeof(ports));
+
+ mlx5_lag_print_mapping(dev0, ldev, tracker,
+ ldev->mode_flags);
+ break;
+ }
+ }
+
+ if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ !(ldev->mode == MLX5_LAG_MODE_ROCE))
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+}
+
+static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
+ unsigned long *flags)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+
+ if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
+ if (ldev->ports > 2)
+ return -EINVAL;
+ return 0;
+ }
+
+ if (ldev->ports > 2)
+ ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
+
+ set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
+
+ return 0;
+}
+
+static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ unsigned long *flags)
+{
+ struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ return;
+
+ if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
+ set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
+}
+
+static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
+ struct lag_tracker *tracker, bool shared_fdb,
+ unsigned long *flags)
+{
+ bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
+
+ *flags = 0;
+ if (shared_fdb) {
+ set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+ }
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+
+ if (roce_lag)
+ return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
+
+ mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
+ return 0;
+}
+
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
+{
+ int port_sel_mode = get_port_sel_mode(mode, flags);
+
+ switch (port_sel_mode) {
+ case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
+ default: return "invalid";
+ }
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ unsigned long flags)
+{
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
+ int err;
+
+ if (tracker)
+ mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
+ mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
+ shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
+
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to create LAG (%d)\n",
+ err);
+ return err;
+ }
+
+ if (shared_fdb) {
+ err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ if (err)
+ mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+ else
+ mlx5_core_info(dev0, "Operation mode is single FDB\n");
+ }
+
+ if (err) {
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ }
+
+ return err;
+}
+
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ bool shared_fdb)
+{
+ bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ unsigned long flags = 0;
+ int err;
+
+ err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
+ if (err)
+ return err;
+
+ if (mode != MLX5_LAG_MODE_MPESW) {
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
+ err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
+ ldev->v2p_map);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to create LAG port selection(%d)\n",
+ err);
+ return err;
+ }
+ }
+ }
+
+ err = mlx5_create_lag(ldev, tracker, mode, flags);
+ if (err) {
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ mlx5_lag_port_sel_destroy(ldev);
+ if (roce_lag)
+ mlx5_core_err(dev0,
+ "Failed to activate RoCE LAG\n");
+ else
+ mlx5_core_err(dev0,
+ "Failed to activate VF LAG\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ return err;
+ }
+
+ if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ !roce_lag)
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+
+ ldev->mode = mode;
+ ldev->mode_flags = flags;
+ return 0;
+}
+
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
+ unsigned long flags = ldev->mode_flags;
+ int err;
+
+ ldev->mode = MLX5_LAG_MODE_NONE;
+ ldev->mode_flags = 0;
+ mlx5_lag_mp_reset(ldev);
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
+ mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+ }
+
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to deactivate VF LAG; driver restart required\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ return err;
+ }
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ mlx5_lag_port_sel_destroy(ldev);
+ if (mlx5_lag_has_drop_rule(ldev))
+ mlx5_lag_drop_rule_cleanup(ldev);
+
+ return 0;
+}
+
+#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+#ifdef CONFIG_MLX5_ESWITCH
+ struct mlx5_core_dev *dev;
+ u8 mode;
+#endif
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].dev)
+ return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ dev = ldev->pf[MLX5_LAG_P1].dev;
+ if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev))
+ return false;
+
+ mode = mlx5_eswitch_mode(dev);
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
+ return false;
+
+ if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
+ return false;
+#else
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
+ return false;
+#endif
+ return true;
+}
+
+static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
+ ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(ldev->pf[i].dev);
+ }
+}
+
+static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
+ ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(ldev->pf[i].dev);
+ }
+}
+
+void mlx5_disable_lag(struct mlx5_lag *ldev)
+{
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ bool roce_lag;
+ int err;
+ int i;
+
+ roce_lag = __mlx5_lag_is_roce(ldev);
+
+ if (shared_fdb) {
+ mlx5_lag_remove_devices(ldev);
+ } else if (roce_lag) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ }
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
+ }
+
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
+
+ if (shared_fdb || roce_lag)
+ mlx5_lag_add_devices(ldev);
+
+ if (shared_fdb) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ }
+}
+
+bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+ if (is_mdev_switchdev_mode(dev0) &&
+ is_mdev_switchdev_mode(dev1) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+ mlx5_devcom_is_paired(dev0->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS) &&
+ MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+ MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+ MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+ return true;
+
+ return false;
+}
+
+static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
+{
+ bool roce_lag = true;
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
+
+#ifdef CONFIG_MLX5_ESWITCH
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
+#endif
+
+ return roce_lag;
+}
+
+static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
+{
+ return do_bond && __mlx5_lag_is_active(ldev) &&
+ ldev->mode != MLX5_LAG_MODE_MPESW;
+}
+
+static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
+{
+ return !do_bond && __mlx5_lag_is_active(ldev) &&
+ ldev->mode != MLX5_LAG_MODE_MPESW;
+}
+
+static void mlx5_do_bond(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ struct lag_tracker tracker = { };
+ bool do_bond, roce_lag;
+ int err;
+ int i;
+
+ if (!mlx5_lag_is_ready(ldev)) {
+ do_bond = false;
+ } else {
+ /* VF LAG is in multipath mode, ignore bond change requests */
+ if (mlx5_lag_is_multipath(dev0))
+ return;
+
+ tracker = ldev->tracker;
+
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ }
+
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
+ roce_lag = mlx5_lag_is_roce_lag(ldev);
+
+ if (shared_fdb || roce_lag)
+ mlx5_lag_remove_devices(ldev);
+
+ err = mlx5_activate_lag(ldev, &tracker,
+ roce_lag ? MLX5_LAG_MODE_ROCE :
+ MLX5_LAG_MODE_SRIOV,
+ shared_fdb);
+ if (err) {
+ if (shared_fdb || roce_lag)
+ mlx5_lag_add_devices(ldev);
+
+ return;
+ } else if (roce_lag) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+ } else if (shared_fdb) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+
+ err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!err)
+ err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+ if (err) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ mlx5_deactivate_lag(ldev);
+ mlx5_lag_add_devices(ldev);
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ mlx5_core_err(dev0, "Failed to enable lag\n");
+ return;
+ }
+ }
+ } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
+ mlx5_modify_lag(ldev, &tracker);
+ } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
+ mlx5_disable_lag(ldev);
+ }
+}
+
+static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
+{
+ queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
+}
+
+static void mlx5_do_bond_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
+ bond_work);
+ int status;
+
+ status = mlx5_dev_list_trylock();
+ if (!status) {
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_do_bond(ldev);
+ mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
+}
+
+static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *upper = info->upper_dev, *ndev_tmp;
+ struct netdev_lag_upper_info *lag_upper_info = NULL;
+ bool is_bonded, is_in_lag, mode_supported;
+ bool has_inactive = 0;
+ struct slave *slave;
+ u8 bond_status = 0;
+ int num_slaves = 0;
+ int changed = 0;
+ int idx;
+
+ if (!netif_is_lag_master(upper))
+ return 0;
+
+ if (info->linking)
+ lag_upper_info = info->upper_info;
+
+ /* The event may still be of interest if the slave does not belong to
+ * us, but is enslaved to a master which has one or more of our netdevs
+ * as slaves (e.g., if a new slave is added to a master that bonds two
+ * of our netdevs, we should unbond).
+ */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx >= 0) {
+ slave = bond_slave_get_rcu(ndev_tmp);
+ if (slave)
+ has_inactive |= bond_is_slave_inactive(slave);
+ bond_status |= (1 << idx);
+ }
+
+ num_slaves++;
+ }
+ rcu_read_unlock();
+
+ /* None of this lagdev's netdevs are slaves of this master. */
+ if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
+ return 0;
+
+ if (lag_upper_info) {
+ tracker->tx_type = lag_upper_info->tx_type;
+ tracker->hash_type = lag_upper_info->hash_type;
+ }
+
+ tracker->has_inactive = has_inactive;
+ /* Determine bonding status:
+ * A device is considered bonded if both its physical ports are slaves
+ * of the same lag master, and only them.
+ */
+ is_in_lag = num_slaves == ldev->ports &&
+ bond_status == GENMASK(ldev->ports - 1, 0);
+
+ /* Lag mode must be activebackup or hash. */
+ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
+
+ is_bonded = is_in_lag && mode_supported;
+ if (tracker->is_bonded != is_bonded) {
+ tracker->is_bonded = is_bonded;
+ changed = 1;
+ }
+
+ if (!is_in_lag)
+ return changed;
+
+ if (!mlx5_lag_is_ready(ldev))
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, PF is configured with more than 64 VFs");
+ else if (!mode_supported)
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, TX type isn't supported");
+
+ return changed;
+}
+
+static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag_lower_info;
+ int idx;
+
+ if (!netif_is_lag_port(ndev))
+ return 0;
+
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
+ if (idx < 0)
+ return 0;
+
+ /* This information is used to determine virtual to physical
+ * port mapping.
+ */
+ lag_lower_info = info->lower_state_info;
+ if (!lag_lower_info)
+ return 0;
+
+ tracker->netdev_state[idx] = *lag_lower_info;
+
+ return 1;
+}
+
+static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev)
+{
+ struct net_device *ndev_tmp;
+ struct slave *slave;
+ bool has_inactive = 0;
+ int idx;
+
+ if (!netif_is_lag_master(ndev))
+ return 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx < 0)
+ continue;
+
+ slave = bond_slave_get_rcu(ndev_tmp);
+ if (slave)
+ has_inactive |= bond_is_slave_inactive(slave);
+ }
+ rcu_read_unlock();
+
+ if (tracker->has_inactive == has_inactive)
+ return 0;
+
+ tracker->has_inactive = has_inactive;
+
+ return 1;
+}
+
+/* this handler is always registered to netdev events */
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct lag_tracker tracker;
+ struct mlx5_lag *ldev;
+ int changed = 0;
+
+ if (event != NETDEV_CHANGEUPPER &&
+ event != NETDEV_CHANGELOWERSTATE &&
+ event != NETDEV_CHANGEINFODATA)
+ return NOTIFY_DONE;
+
+ ldev = container_of(this, struct mlx5_lag, nb);
+
+ tracker = ldev->tracker;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
+ ndev, ptr);
+ break;
+ case NETDEV_CHANGEINFODATA:
+ changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
+ break;
+ }
+
+ ldev->tracker = tracker;
+
+ if (changed)
+ mlx5_queue_bond_work(ldev, 0);
+
+ return NOTIFY_DONE;
+}
+
+static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ unsigned int fn = mlx5_get_dev_index(dev);
+ unsigned long flags;
+
+ if (fn >= ldev->ports)
+ return;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev->pf[fn].netdev = netdev;
+ ldev->tracker.netdev_state[fn].link_up = 0;
+ ldev->tracker.netdev_state[fn].tx_enabled = 0;
+ spin_unlock_irqrestore(&lag_lock, flags);
+}
+
+static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
+ struct net_device *netdev)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ for (i = 0; i < ldev->ports; i++) {
+ if (ldev->pf[i].netdev == netdev) {
+ ldev->pf[i].netdev = NULL;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&lag_lock, flags);
+}
+
+static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ unsigned int fn = mlx5_get_dev_index(dev);
+
+ if (fn >= ldev->ports)
+ return;
+
+ ldev->pf[fn].dev = dev;
+ dev->priv.lag = ldev;
+}
+
+static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].dev == dev)
+ break;
+
+ if (i == ldev->ports)
+ return;
+
+ ldev->pf[i].dev = NULL;
+ dev->priv.lag = NULL;
+}
+
+/* Must be called with intf_mutex held */
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = NULL;
+ struct mlx5_core_dev *tmp_dev;
+
+ tmp_dev = mlx5_get_next_phys_dev_lag(dev);
+ if (tmp_dev)
+ ldev = tmp_dev->priv.lag;
+
+ if (!ldev) {
+ ldev = mlx5_lag_dev_alloc(dev);
+ if (!ldev) {
+ mlx5_core_err(dev, "Failed to alloc lag dev\n");
+ return 0;
+ }
+ mlx5_ldev_add_mdev(ldev, dev);
+ return 0;
+ }
+
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ return -EAGAIN;
+ }
+ mlx5_ldev_get(ldev);
+ mlx5_ldev_add_mdev(ldev, dev);
+ mutex_unlock(&ldev->lock);
+
+ return 0;
+}
+
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ /* mdev is being removed, might as well remove debugfs
+ * as early as possible.
+ */
+ mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
+recheck:
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ msleep(100);
+ goto recheck;
+ }
+ mlx5_ldev_remove_mdev(ldev, dev);
+ mutex_unlock(&ldev->lock);
+ mlx5_ldev_put(ldev);
+}
+
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
+ return;
+
+recheck:
+ mlx5_dev_list_lock();
+ err = __mlx5_lag_dev_add_mdev(dev);
+ mlx5_dev_list_unlock();
+
+ if (err) {
+ msleep(100);
+ goto recheck;
+ }
+ mlx5_ldev_add_debugfs(dev);
+}
+
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ struct mlx5_lag *ldev;
+ bool lag_is_active;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ mlx5_ldev_remove_netdev(ldev, netdev);
+ clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+
+ lag_is_active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+
+ if (lag_is_active)
+ mlx5_queue_bond_work(ldev, 0);
+}
+
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ mlx5_ldev_add_netdev(ldev, dev, netdev);
+
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].netdev)
+ break;
+
+ if (i >= ldev->ports)
+ set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+ mutex_unlock(&ldev->lock);
+ mlx5_queue_bond_work(ldev, 0);
+}
+
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_roce);
+
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_active);
+
+bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res = 0;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (ldev)
+ res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
+
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev) &&
+ dev == ldev->pf[MLX5_LAG_P1].dev;
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
+
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev) &&
+ test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mlx5_dev_list_lock();
+ mutex_lock(&ldev->lock);
+
+ ldev->mode_changes_in_progress++;
+ if (__mlx5_lag_is_active(ldev))
+ mlx5_disable_lag(ldev);
+
+ mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
+}
+
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ ldev->mode_changes_in_progress--;
+ mutex_unlock(&ldev->lock);
+ mlx5_queue_bond_work(ldev, 0);
+}
+
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+{
+ struct net_device *ndev = NULL;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+ goto unlock;
+
+ if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->tracker.netdev_state[i].tx_enabled)
+ ndev = ldev->pf[i].netdev;
+ if (!ndev)
+ ndev = ldev->pf[ldev->ports - 1].netdev;
+ } else {
+ ndev = ldev->pf[MLX5_LAG_P1].netdev;
+ }
+ if (ndev)
+ dev_hold(ndev);
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+ struct net_device *slave)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ u8 port = 0;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+ goto unlock;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
+ port = i;
+ break;
+ }
+ }
+
+ port = ldev->v2p_map[port * ldev->buckets];
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+ return port;
+}
+EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+
+u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return 0;
+
+ return ldev->ports;
+}
+EXPORT_SYMBOL(mlx5_lag_get_num_ports);
+
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev = NULL;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ goto unlock;
+
+ peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+ ldev->pf[MLX5_LAG_P2].dev :
+ ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+ return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ u64 *values,
+ int num_counters,
+ size_t *offsets)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ struct mlx5_core_dev **mdev;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ int num_ports;
+ int ret, i, j;
+ void *out;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
+ if (!mdev) {
+ ret = -ENOMEM;
+ goto free_out;
+ }
+
+ memset(values, 0, sizeof(*values) * num_counters);
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (ldev && __mlx5_lag_is_active(ldev)) {
+ num_ports = ldev->ports;
+ for (i = 0; i < ldev->ports; i++)
+ mdev[i] = ldev->pf[i].dev;
+ } else {
+ num_ports = 1;
+ mdev[MLX5_LAG_P1] = dev;
+ }
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ for (i = 0; i < num_ports; ++i) {
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
+ out);
+ if (ret)
+ goto free_mdev;
+
+ for (j = 0; j < num_counters; ++j)
+ values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+ }
+
+free_mdev:
+ kvfree(mdev);
+free_out:
+ kvfree(out);
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
new file mode 100644
index 000000000000..ce2ce8ccbd70
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_H__
+#define __MLX5_LAG_H__
+
+#include <linux/debugfs.h>
+
+#define MLX5_LAG_MAX_HASH_BUCKETS 16
+#include "mlx5_core.h"
+#include "mp.h"
+#include "port_sel.h"
+#include "mpesw.h"
+
+enum {
+ MLX5_LAG_P1,
+ MLX5_LAG_P2,
+};
+
+enum {
+ MLX5_LAG_FLAG_NDEVS_READY,
+};
+
+enum {
+ MLX5_LAG_MODE_FLAG_HASH_BASED,
+ MLX5_LAG_MODE_FLAG_SHARED_FDB,
+ MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+};
+
+enum mlx5_lag_mode {
+ MLX5_LAG_MODE_NONE,
+ MLX5_LAG_MODE_ROCE,
+ MLX5_LAG_MODE_SRIOV,
+ MLX5_LAG_MODE_MULTIPATH,
+ MLX5_LAG_MODE_MPESW,
+};
+
+struct lag_func {
+ struct mlx5_core_dev *dev;
+ struct net_device *netdev;
+ bool has_drop;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+ enum netdev_lag_tx_type tx_type;
+ struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
+ unsigned int is_bonded:1;
+ unsigned int has_inactive:1;
+ enum netdev_lag_hash hash_type;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+ enum mlx5_lag_mode mode;
+ unsigned long mode_flags;
+ unsigned long state_flags;
+ u8 ports;
+ u8 buckets;
+ int mode_changes_in_progress;
+ u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
+ struct kref ref;
+ struct lag_func pf[MLX5_MAX_PORTS];
+ struct lag_tracker tracker;
+ struct workqueue_struct *wq;
+ struct delayed_work bond_work;
+ struct work_struct mpesw_work;
+ struct notifier_block nb;
+ struct lag_mp lag_mp;
+ struct mlx5_lag_port_sel port_sel;
+ /* Protect lag fields/state changes */
+ struct mutex lock;
+ struct lag_mpesw lag_mpesw;
+};
+
+static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ MLX5_CAP_GEN(dev, num_lag_ports) < 2 ||
+ MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS)
+ return false;
+ return true;
+}
+
+static inline struct mlx5_lag *
+mlx5_lag_dev(struct mlx5_core_dev *dev)
+{
+ return dev->priv.lag;
+}
+
+static inline bool
+__mlx5_lag_is_active(struct mlx5_lag *ldev)
+{
+ return ldev->mode != MLX5_LAG_MODE_NONE;
+}
+
+static inline bool
+mlx5_lag_is_ready(struct mlx5_lag *ldev)
+{
+ return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+}
+
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker);
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ bool shared_fdb);
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev);
+bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev);
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
+
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags);
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
+void mlx5_ldev_remove_debugfs(struct dentry *dbg);
+void mlx5_disable_lag(struct mlx5_lag *ldev);
+
+#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index 416676c35b1f..0259a149a64c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -3,39 +3,42 @@
#include <linux/netdevice.h>
#include <net/nexthop.h>
-#include "lag.h"
-#include "lag_mp.h"
+#include "lag/lag.h"
+#include "lag/mp.h"
#include "mlx5_core.h"
#include "eswitch.h"
#include "lib/mlx5.h"
+static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
+{
+ return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
+}
+
static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
{
- if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
+ if (!mlx5_lag_is_ready(ldev))
+ return false;
+
+ if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
return false;
return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
ldev->pf[MLX5_LAG_P2].dev);
}
-static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
-{
- return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
-}
-
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
- ldev = mlx5_lag_dev_get(dev);
+ ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_multipath(ldev);
return res;
}
/**
- * Set lag port affinity
+ * mlx5_lag_set_port_affinity
*
* @ldev: lag device
* @port:
@@ -47,7 +50,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
enum mlx5_lag_port_affinity port)
{
- struct lag_tracker tracker;
+ struct lag_tracker tracker = {};
if (!__mlx5_lag_is_multipath(ldev))
return;
@@ -93,9 +96,16 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
{
struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
- struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
- flush_workqueue(ldev->wq);
+ flush_workqueue(mp->wq);
+}
+
+static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
+{
+ mp->fib.mfi = fi;
+ mp->fib.priority = fi->fib_priority;
+ mp->fib.dst = dst;
+ mp->fib.dst_len = dst_len;
}
struct mlx5_fib_event_work {
@@ -108,10 +118,10 @@ struct mlx5_fib_event_work {
};
};
-static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
- unsigned long event,
- struct fib_info *fi)
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
+ struct fib_entry_notifier_info *fen_info)
{
+ struct fib_info *fi = fen_info->fi;
struct lag_mp *mp = &ldev->lag_mp;
struct fib_nh *fib_nh0, *fib_nh1;
unsigned int nhs;
@@ -119,11 +129,17 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
/* stop track */
- if (mp->mfi == fi)
- mp->mfi = NULL;
+ if (mp->fib.mfi == fi)
+ mp->fib.mfi = NULL;
return;
}
+ /* Handle multipath entry with lower priority value */
+ if (mp->fib.mfi && mp->fib.mfi != fi &&
+ (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
+ fi->fib_priority >= mp->fib.priority)
+ return;
+
/* Handle add/replace event */
nhs = fib_info_num_path(fi);
if (nhs == 1) {
@@ -132,8 +148,14 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct net_device *nh_dev = nh->fib_nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
- mlx5_lag_set_port_affinity(ldev, ++i);
+ if (i < 0)
+ return;
+
+ i++;
+ mlx5_lag_set_port_affinity(ldev, i);
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
+
return;
}
@@ -153,15 +175,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
/* First time we see multipath route */
- if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
+ if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
struct lag_tracker tracker;
tracker = ldev->tracker;
- mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
- mp->mfi = fi;
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
@@ -172,7 +194,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
struct lag_mp *mp = &ldev->lag_mp;
/* Check the nh event is related to the route */
- if (!mp->mfi || mp->mfi != fi)
+ if (!mp->fib.mfi || mp->fib.mfi != fi)
return;
/* nh added/removed */
@@ -199,13 +221,13 @@ static void mlx5_lag_fib_update(struct work_struct *work)
/* Protect internal structures from changes */
rtnl_lock();
switch (fib_work->event) {
- case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
mlx5_lag_fib_route_event(ldev, fib_work->event,
- fib_work->fen_info.fi);
+ &fib_work->fen_info);
fib_info_put(fib_work->fen_info.fi);
break;
- case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_ADD:
case FIB_EVENT_NH_DEL:
fib_nh = fib_work->fnh_info.fib_nh;
mlx5_lag_fib_nexthop_event(ldev,
@@ -256,15 +278,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
return NOTIFY_DONE;
switch (event) {
- case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
- if (fi->nh) {
- NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
- return notifier_from_errno(-EINVAL);
- }
+ if (fi->nh)
+ return NOTIFY_DONE;
fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
@@ -279,7 +299,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
*/
fib_info_hold(fib_work->fen_info.fi);
break;
- case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_ADD:
case FIB_EVENT_NH_DEL:
fnh_info = container_of(info, struct fib_nh_notifier_info,
info);
@@ -293,24 +313,43 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
return NOTIFY_DONE;
}
- queue_work(ldev->wq, &fib_work->work);
+ queue_work(mp->wq, &fib_work->work);
return NOTIFY_DONE;
}
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+{
+ /* Clear mfi, as it might become stale when a route delete event
+ * has been missed, see mlx5_lag_fib_route_event().
+ */
+ ldev->lag_mp.fib.mfi = NULL;
+}
+
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
int err;
+ /* always clear mfi, as it might become stale when a route delete event
+ * has been missed
+ */
+ mp->fib.mfi = NULL;
+
if (mp->fib_nb.notifier_call)
return 0;
+ mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
+ if (!mp->wq)
+ return -ENOMEM;
+
mp->fib_nb.notifier_call = mlx5_lag_fib_event;
err = register_fib_notifier(&init_net, &mp->fib_nb,
mlx5_lag_fib_event_flush, NULL);
- if (err)
+ if (err) {
+ destroy_workqueue(mp->wq);
mp->fib_nb.notifier_call = NULL;
+ }
return err;
}
@@ -323,5 +362,7 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
return;
unregister_fib_notifier(&init_net, &mp->fib_nb);
+ destroy_workqueue(mp->wq);
mp->fib_nb.notifier_call = NULL;
+ mp->fib.mfi = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
index 79be89e9c7a4..056a066da604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
@@ -15,18 +15,28 @@ enum mlx5_lag_port_affinity {
struct lag_mp {
struct notifier_block fib_nb;
- struct fib_info *mfi; /* used in tracking fib events */
+ struct {
+ const void *mfi; /* used in tracking fib events */
+ u32 priority;
+ u32 dst;
+ int dst_len;
+ } fib;
+ struct workqueue_struct *wq;
};
#ifdef CONFIG_MLX5_ESWITCH
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev);
int mlx5_lag_mp_init(struct mlx5_lag *ldev);
void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
#else /* CONFIG_MLX5_ESWITCH */
+static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
+static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_LAG_MP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
new file mode 100644
index 000000000000..f643202b29c6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <net/nexthop.h>
+#include "lag/lag.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+
+void mlx5_mpesw_work(struct work_struct *work)
+{
+ struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work);
+
+ mutex_lock(&ldev->lock);
+ mlx5_disable_lag(ldev);
+ mutex_unlock(&ldev->lock);
+}
+
+static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+
+ if (!queue_work(ldev->wq, &ldev->mpesw_work))
+ mlx5_core_warn(dev, "failed to queue work\n");
+}
+
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
+ ldev->mode == MLX5_LAG_MODE_MPESW)
+ mlx5_lag_disable_mpesw(dev);
+ mutex_unlock(&ldev->lock);
+}
+
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+ int err = 0;
+
+ if (!ldev)
+ return 0;
+
+ mutex_lock(&ldev->lock);
+ if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
+ goto out;
+
+ if (ldev->mode != MLX5_LAG_MODE_NONE) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
+ if (err)
+ mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
+
+out:
+ mutex_unlock(&ldev->lock);
+ return err;
+}
+
+int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
+{
+ struct mlx5_lag *ldev = mdev->priv.lag;
+
+ if (!netif_is_bond_master(out_dev) || !ldev)
+ return 0;
+
+ mutex_lock(&ldev->lock);
+ if (ldev->mode == MLX5_LAG_MODE_MPESW) {
+ mutex_unlock(&ldev->lock);
+ return -EOPNOTSUPP;
+ }
+ mutex_unlock(&ldev->lock);
+ return 0;
+}
+
+bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
+{
+ bool ret;
+
+ ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW;
+ return ret;
+}
+
+void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
+{
+ INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work);
+ atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
+}
+
+void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
+{
+ cancel_delayed_work_sync(&ldev->bond_work);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
new file mode 100644
index 000000000000..be4abcb8fcd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LAG_MPESW_H__
+#define __MLX5_LAG_MPESW_H__
+
+#include "lag.h"
+#include "mlx5_core.h"
+
+struct lag_mpesw {
+ struct work_struct mpesw_work;
+ atomic_t mpesw_rule_count;
+};
+
+void mlx5_mpesw_work(struct work_struct *work);
+int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
+bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+void mlx5_lag_mpesw_init(struct mlx5_lag *ldev);
+void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev);
+#else
+static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {}
+static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {}
+#endif
+
+#endif /* __MLX5_LAG_MPESW_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
new file mode 100644
index 000000000000..d3a3fe4ce670
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/netdevice.h>
+#include "lag.h"
+
+enum {
+ MLX5_LAG_FT_LEVEL_TTC,
+ MLX5_LAG_FT_LEVEL_INNER_TTC,
+ MLX5_LAG_FT_LEVEL_DEFINER,
+};
+
+static struct mlx5_flow_group *
+mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_definer *definer,
+ u8 rules)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_definer_id,
+ mlx5_get_match_definer_id(definer));
+ MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1);
+ MLX5_SET(create_flow_group_in, in, group_type,
+ MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT);
+
+ fg = mlx5_create_flow_group(ft, in);
+ kvfree(in);
+ return fg;
+}
+
+static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *lag_definer,
+ u8 *ports)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_namespace *ns;
+ int err, i;
+ int idx;
+ int j;
+
+ ft_attr.max_fte = ldev->ports * ldev->buckets;
+ ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL);
+ if (!ns) {
+ mlx5_core_warn(dev, "Failed to get port selection namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ lag_definer->ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(lag_definer->ft)) {
+ mlx5_core_warn(dev, "Failed to create port selection table\n");
+ return PTR_ERR(lag_definer->ft);
+ }
+
+ lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft,
+ lag_definer->definer,
+ ft_attr.max_fte);
+ if (IS_ERR(lag_definer->fg)) {
+ err = PTR_ERR(lag_definer->fg);
+ goto destroy_ft;
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ flow_act.flags |= FLOW_ACT_NO_APPEND;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ u8 affinity;
+
+ idx = i * ldev->buckets + j;
+ affinity = ports[idx];
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
+ vhca_id);
+ lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
+ NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(lag_definer->rules[idx])) {
+ err = PTR_ERR(lag_definer->rules[idx]);
+ while (i--)
+ while (j--)
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ goto destroy_fg;
+ }
+ }
+ }
+
+ return 0;
+
+destroy_fg:
+ mlx5_destroy_flow_group(lag_definer->fg);
+destroy_ft:
+ mlx5_destroy_flow_table(lag_definer->ft);
+ return err;
+}
+
+static int mlx5_lag_set_definer_inner(u32 *match_definer_mask,
+ enum mlx5_traffic_types tt)
+{
+ int format_id;
+ u8 *ipv6;
+
+ switch (tt) {
+ case MLX5_TT_IPV4_UDP:
+ case MLX5_TT_IPV4_TCP:
+ format_id = 23;
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l4_dport);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV4:
+ format_id = 23;
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l3_type);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV6_TCP:
+ case MLX5_TT_IPV6_UDP:
+ format_id = 31;
+ MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask,
+ inner_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask,
+ inner_l4_dport);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask,
+ inner_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask,
+ inner_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ break;
+ case MLX5_TT_IPV6:
+ format_id = 32;
+ ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask,
+ inner_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask,
+ inner_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_smac_15_0);
+ break;
+ default:
+ format_id = 23;
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l3_type);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_15_0);
+ break;
+ }
+
+ return format_id;
+}
+
+static int mlx5_lag_set_definer(u32 *match_definer_mask,
+ enum mlx5_traffic_types tt, bool tunnel,
+ enum netdev_lag_hash hash)
+{
+ int format_id;
+ u8 *ipv6;
+
+ if (tunnel)
+ return mlx5_lag_set_definer_inner(match_definer_mask, tt);
+
+ switch (tt) {
+ case MLX5_TT_IPV4_UDP:
+ case MLX5_TT_IPV4_TCP:
+ format_id = 22;
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_l4_dport);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV4:
+ format_id = 22;
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_l3_type);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_smac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV6_TCP:
+ case MLX5_TT_IPV6_UDP:
+ format_id = 29;
+ MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask,
+ outer_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask,
+ outer_l4_dport);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask,
+ outer_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask,
+ outer_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ break;
+ case MLX5_TT_IPV6:
+ format_id = 30;
+ ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask,
+ outer_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask,
+ outer_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_smac_15_0);
+ break;
+ default:
+ format_id = 0;
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_smac_15_0);
+
+ if (hash == NETDEV_LAG_HASH_VLAN_SRCMAC) {
+ MLX5_SET_TO_ONES(match_definer_format_0,
+ match_definer_mask,
+ outer_first_vlan_vid);
+ break;
+ }
+
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_ethertype);
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_dmac_15_0);
+ break;
+ }
+
+ return format_id;
+}
+
+static struct mlx5_lag_definer *
+mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
+ enum mlx5_traffic_types tt, bool tunnel, u8 *ports)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_definer *lag_definer;
+ u32 *match_definer_mask;
+ int format_id, err;
+
+ lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL);
+ if (!lag_definer)
+ return ERR_PTR(-ENOMEM);
+
+ match_definer_mask = kvzalloc(MLX5_FLD_SZ_BYTES(match_definer,
+ match_mask),
+ GFP_KERNEL);
+ if (!match_definer_mask) {
+ err = -ENOMEM;
+ goto free_lag_definer;
+ }
+
+ format_id = mlx5_lag_set_definer(match_definer_mask, tt, tunnel, hash);
+ lag_definer->definer =
+ mlx5_create_match_definer(dev, MLX5_FLOW_NAMESPACE_PORT_SEL,
+ format_id, match_definer_mask);
+ if (IS_ERR(lag_definer->definer)) {
+ err = PTR_ERR(lag_definer->definer);
+ goto free_mask;
+ }
+
+ err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports);
+ if (err)
+ goto destroy_match_definer;
+
+ kvfree(match_definer_mask);
+
+ return lag_definer;
+
+destroy_match_definer:
+ mlx5_destroy_match_definer(dev, lag_definer->definer);
+free_mask:
+ kvfree(match_definer_mask);
+free_lag_definer:
+ kfree(lag_definer);
+ return ERR_PTR(err);
+}
+
+static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *lag_definer)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
+ int i;
+ int j;
+
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ }
+ }
+ mlx5_destroy_flow_group(lag_definer->fg);
+ mlx5_destroy_flow_table(lag_definer->ft);
+ mlx5_destroy_match_definer(dev, lag_definer->definer);
+ kfree(lag_definer);
+}
+
+static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ if (port_sel->outer.definers[tt])
+ mlx5_lag_destroy_definer(ldev,
+ port_sel->outer.definers[tt]);
+ if (port_sel->inner.definers[tt])
+ mlx5_lag_destroy_definer(ldev,
+ port_sel->inner.definers[tt]);
+ }
+}
+
+static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct mlx5_lag_definer *lag_definer;
+ int tt, err;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt,
+ false, ports);
+ if (IS_ERR(lag_definer)) {
+ err = PTR_ERR(lag_definer);
+ goto destroy_definers;
+ }
+ port_sel->outer.definers[tt] = lag_definer;
+
+ if (!port_sel->tunnel)
+ continue;
+
+ lag_definer =
+ mlx5_lag_create_definer(ldev, hash_type, tt,
+ true, ports);
+ if (IS_ERR(lag_definer)) {
+ err = PTR_ERR(lag_definer);
+ goto destroy_definers;
+ }
+ port_sel->inner.definers[tt] = lag_definer;
+ }
+
+ return 0;
+
+destroy_definers:
+ mlx5_lag_destroy_definers(ldev);
+ return err;
+}
+
+static void set_tt_map(struct mlx5_lag_port_sel *port_sel,
+ enum netdev_lag_hash hash)
+{
+ port_sel->tunnel = false;
+
+ switch (hash) {
+ case NETDEV_LAG_HASH_E34:
+ port_sel->tunnel = true;
+ fallthrough;
+ case NETDEV_LAG_HASH_L34:
+ set_bit(MLX5_TT_IPV4_TCP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV4_UDP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6_TCP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6_UDP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV4, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6, port_sel->tt_map);
+ set_bit(MLX5_TT_ANY, port_sel->tt_map);
+ break;
+ case NETDEV_LAG_HASH_E23:
+ port_sel->tunnel = true;
+ fallthrough;
+ case NETDEV_LAG_HASH_L23:
+ set_bit(MLX5_TT_IPV4, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6, port_sel->tt_map);
+ set_bit(MLX5_TT_ANY, port_sel->tt_map);
+ break;
+ default:
+ set_bit(MLX5_TT_ANY, port_sel->tt_map);
+ break;
+ }
+}
+
+#define SET_IGNORE_DESTS_BITS(tt_map, dests) \
+ do { \
+ int idx; \
+ \
+ for_each_clear_bit(idx, tt_map, MLX5_NUM_TT) \
+ set_bit(idx, dests); \
+ } while (0)
+
+static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct mlx5_flow_table_attr *ft_attr;
+ int tt;
+
+ ttc_params->ns = mlx5_get_flow_namespace(dev,
+ MLX5_FLOW_NAMESPACE_PORT_SEL);
+ ft_attr = &ttc_params->ft_attr;
+ ft_attr->level = MLX5_LAG_FT_LEVEL_INNER_TTC;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ ttc_params->dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->dests[tt].ft = port_sel->inner.definers[tt]->ft;
+ }
+ SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests);
+}
+
+static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct mlx5_flow_table_attr *ft_attr;
+ int tt;
+
+ ttc_params->ns = mlx5_get_flow_namespace(dev,
+ MLX5_FLOW_NAMESPACE_PORT_SEL);
+ ft_attr = &ttc_params->ft_attr;
+ ft_attr->level = MLX5_LAG_FT_LEVEL_TTC;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ ttc_params->dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->dests[tt].ft = port_sel->outer.definers[tt]->ft;
+ }
+ SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests);
+
+ ttc_params->inner_ttc = port_sel->tunnel;
+ if (!port_sel->tunnel)
+ return;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ ttc_params->tunnel_dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->tunnel_dests[tt].ft =
+ mlx5_get_ttc_flow_table(port_sel->inner.ttc);
+ }
+}
+
+static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct ttc_params ttc_params = {};
+
+ mlx5_lag_set_outer_ttc_params(ldev, &ttc_params);
+ port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params);
+ if (IS_ERR(port_sel->outer.ttc))
+ return PTR_ERR(port_sel->outer.ttc);
+
+ return 0;
+}
+
+static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct ttc_params ttc_params = {};
+
+ mlx5_lag_set_inner_ttc_params(ldev, &ttc_params);
+ port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params);
+ if (IS_ERR(port_sel->inner.ttc))
+ return PTR_ERR(port_sel->inner.ttc);
+
+ return 0;
+}
+
+int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type, u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+
+ set_tt_map(port_sel, hash_type);
+ err = mlx5_lag_create_definers(ldev, hash_type, ports);
+ if (err)
+ return err;
+
+ if (port_sel->tunnel) {
+ err = mlx5_lag_create_inner_ttc_table(ldev);
+ if (err)
+ goto destroy_definers;
+ }
+
+ err = mlx5_lag_create_ttc_table(ldev);
+ if (err)
+ goto destroy_inner;
+
+ return 0;
+
+destroy_inner:
+ if (port_sel->tunnel)
+ mlx5_destroy_ttc_table(port_sel->inner.ttc);
+destroy_definers:
+ mlx5_lag_destroy_definers(ldev);
+ return err;
+}
+
+static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *def,
+ u8 *ports)
+{
+ struct mlx5_flow_destination dest = {};
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ldev->v2p_map[i] == ports[i])
+ continue;
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
+ vhca_id);
+ err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer **definers,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+
+ err = mlx5_lag_modify_definers_destinations(ldev,
+ port_sel->outer.definers,
+ ports);
+ if (err)
+ return err;
+
+ if (!port_sel->tunnel)
+ return 0;
+
+ return mlx5_lag_modify_definers_destinations(ldev,
+ port_sel->inner.definers,
+ ports);
+}
+
+void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+
+ mlx5_destroy_ttc_table(port_sel->outer.ttc);
+ if (port_sel->tunnel)
+ mlx5_destroy_ttc_table(port_sel->inner.ttc);
+ mlx5_lag_destroy_definers(ldev);
+ memset(port_sel, 0, sizeof(*port_sel));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
new file mode 100644
index 000000000000..5ec3af2a3ecd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_LAG_FS_H__
+#define __MLX5_LAG_FS_H__
+
+#include "lib/fs_ttc.h"
+
+struct mlx5_lag_definer {
+ struct mlx5_flow_definer *definer;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ /* Each port has ldev->buckets number of rules and they are arrange in
+ * [port * buckets .. port * buckets + buckets) locations
+ */
+ struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
+};
+
+struct mlx5_lag_ttc {
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_lag_definer *definers[MLX5_NUM_TT];
+};
+
+struct mlx5_lag_port_sel {
+ DECLARE_BITMAP(tt_map, MLX5_NUM_TT);
+ bool tunnel;
+ struct mlx5_lag_ttc outer;
+ struct mlx5_lag_ttc inner;
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports);
+void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev);
+int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type, u8 *ports);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type,
+ u8 *ports)
+{
+ return 0;
+}
+
+static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
+{
+ return 0;
+}
+
+static inline void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) {}
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_LAG_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
new file mode 100644
index 000000000000..c971ff04dd04
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/transobj.h>
+#include "clock.h"
+#include "aso.h"
+#include "wq.h"
+
+struct mlx5_aso_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ struct mlx5_core_cq mcq;
+
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5_aso {
+ /* data path */
+ u16 cc;
+ u16 pc;
+
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5_aso_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ u32 sqn;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+
+} ____cacheline_aligned_in_smp;
+
+static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node,
+ void *cqc_data, struct mlx5_aso_cq *cq)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ struct mlx5_wq_param param;
+ int err;
+ u32 i;
+
+ param.buf_numa_node = numa_node;
+ param.db_numa_node = numa_node;
+
+ err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+
+ return 0;
+}
+
+static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data)
+{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ void *in, *cqc;
+ int inlen, eqn;
+ int err;
+
+ err = mlx5_vector2eqn(mdev, 0, &eqn);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node,
+ struct mlx5_aso_cq *cq)
+{
+ void *cqc_data;
+ int err;
+
+ cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
+ if (!cqc_data)
+ return -ENOMEM;
+
+ MLX5_SET(cqc, cqc_data, log_cq_size, 1);
+ MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD);
+
+ err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err);
+ goto err_out;
+ }
+
+ err = create_aso_cq(cq, cqc_data);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err);
+ goto err_free_cq;
+ }
+
+ kvfree(cqc_data);
+ return 0;
+
+err_free_cq:
+ mlx5_aso_free_cq(cq);
+err_out:
+ kvfree(cqc_data);
+ return err;
+}
+
+static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wq_param param;
+ int err;
+
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+
+ param.db_numa_node = numa_node;
+ param.buf_numa_node = numa_node;
+ err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ return 0;
+}
+
+static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ void *in, *sqc, *wq;
+ int inlen, err;
+ u8 ts_format;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * sq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
+ MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ ts_format = mlx5_is_real_time_sq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ void *in, *sqc;
+ int inlen, err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+ err = mlx5_core_modify_sq(mdev, sqn, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ int err;
+
+ err = create_aso_sq(mdev, pdn, sqc_data, sq);
+ if (err)
+ return err;
+
+ err = mlx5_aso_set_sq_rdy(mdev, sq->sqn);
+ if (err)
+ mlx5_core_destroy_sq(mdev, sq->sqn);
+
+ return err;
+}
+
+static void mlx5_aso_free_sq(struct mlx5_aso *sq)
+{
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5_aso_destroy_sq(struct mlx5_aso *sq)
+{
+ mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn);
+ mlx5_aso_free_sq(sq);
+}
+
+static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node,
+ u32 pdn, struct mlx5_aso *sq)
+{
+ void *sqc_data, *wq;
+ int err;
+
+ sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
+ if (!sqc_data)
+ return -ENOMEM;
+
+ wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, pdn);
+ MLX5_SET(wq, wq, log_wq_sz, 1);
+
+ err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err);
+ goto err_out;
+ }
+
+ err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err);
+ goto err_free_asosq;
+ }
+
+ mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn);
+
+ kvfree(sqc_data);
+ return 0;
+
+err_free_asosq:
+ mlx5_aso_free_sq(sq);
+err_out:
+ kvfree(sqc_data);
+ return err;
+}
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn)
+{
+ int numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ struct mlx5_aso *aso;
+ int err;
+
+ aso = kzalloc(sizeof(*aso), GFP_KERNEL);
+ if (!aso)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq);
+ if (err)
+ goto err_cq;
+
+ err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso);
+ if (err)
+ goto err_sq;
+
+ return aso;
+
+err_sq:
+ mlx5_aso_destroy_cq(&aso->cq);
+err_cq:
+ kfree(aso);
+ return ERR_PTR(err);
+}
+
+void mlx5_aso_destroy(struct mlx5_aso *aso)
+{
+ if (IS_ERR_OR_NULL(aso))
+ return;
+
+ mlx5_aso_destroy_sq(aso);
+ mlx5_aso_destroy_cq(&aso->cq);
+ kfree(aso);
+}
+
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+ struct mlx5_aso_wqe *aso_wqe,
+ u32 obj_id, u32 opc_mode)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) |
+ (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_ACCESS_ASO);
+ cseg->qpn_ds = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt);
+ cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ cseg->general_id = cpu_to_be32(obj_id);
+}
+
+void *mlx5_aso_get_wqe(struct mlx5_aso *aso)
+{
+ u16 pi;
+
+ pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc);
+ return mlx5_wq_cyc_get_wqe(&aso->wq, pi);
+}
+
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg)
+{
+ doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ if (with_data)
+ aso->pc += MLX5_ASO_WQEBBS_DATA;
+ else
+ aso->pc += MLX5_ASO_WQEBBS;
+ *aso->wq.db = cpu_to_be32(aso->pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map);
+
+ /* Ensure doorbell is written on uar_page before poll_cq */
+ WRITE_ONCE(doorbell_cseg, NULL);
+}
+
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data)
+{
+ struct mlx5_aso_cq *cq = &aso->cq;
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return -ETIMEDOUT;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ mlx5_cqwq_pop(&cq->wq);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ struct mlx5_err_cqe *err_cqe;
+
+ mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+
+ err_cqe = (struct mlx5_err_cqe *)cqe;
+ mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n",
+ err_cqe->vendor_err_synd);
+ mlx5_core_err(cq->mdev, "syndrome=%x\n",
+ err_cqe->syndrome);
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+ 16, 1, err_cqe,
+ sizeof(*err_cqe), false);
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ if (with_data)
+ aso->cc += MLX5_ASO_WQEBBS_DATA;
+ else
+ aso->cc += MLX5_ASO_WQEBBS;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
new file mode 100644
index 000000000000..2d40dcf9d42e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_ASO_H__
+#define __MLX5_LIB_ASO_H__
+
+#include <linux/mlx5/qp.h>
+#include "mlx5_core.h"
+
+#define MLX5_ASO_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB))
+#define MLX5_ASO_WQEBBS_DATA \
+ (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB))
+#define ASO_CTRL_READ_EN BIT(0)
+#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24
+#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS))
+
+struct mlx5_wqe_aso_ctrl_seg {
+ __be32 va_h;
+ __be32 va_l; /* include read_enable */
+ __be32 l_key;
+ u8 data_mask_mode;
+ u8 condition_1_0_operand;
+ u8 condition_1_0_offset;
+ u8 data_offset_condition_operand;
+ __be32 condition_0_data;
+ __be32 condition_0_mask;
+ __be32 condition_1_data;
+ __be32 condition_1_mask;
+ __be64 bitwise_data;
+ __be64 data_mask;
+};
+
+struct mlx5_wqe_aso_data_seg {
+ __be32 bytewise_data[16];
+};
+
+struct mlx5_aso_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_aso_ctrl_seg aso_ctrl;
+};
+
+struct mlx5_aso_wqe_data {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_aso_ctrl_seg aso_ctrl;
+ struct mlx5_wqe_aso_data_seg aso_data;
+};
+
+enum {
+ MLX5_ASO_LOGICAL_AND,
+ MLX5_ASO_LOGICAL_OR,
+};
+
+enum {
+ MLX5_ASO_ALWAYS_FALSE,
+ MLX5_ASO_ALWAYS_TRUE,
+ MLX5_ASO_EQUAL,
+ MLX5_ASO_NOT_EQUAL,
+ MLX5_ASO_GREATER_OR_EQUAL,
+ MLX5_ASO_LESSER_OR_EQUAL,
+ MLX5_ASO_LESSER,
+ MLX5_ASO_GREATER,
+ MLX5_ASO_CYCLIC_GREATER,
+ MLX5_ASO_CYCLIC_LESSER,
+};
+
+enum {
+ MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT,
+ MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE,
+ MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE,
+};
+
+enum {
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5,
+};
+
+struct mlx5_aso;
+
+void *mlx5_aso_get_wqe(struct mlx5_aso *aso);
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+ struct mlx5_aso_wqe *aso_wqe,
+ u32 obj_id, u32 opc_mode);
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg);
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data);
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn);
+void mlx5_aso_destroy(struct mlx5_aso *aso);
+#endif /* __MLX5_LIB_ASO_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 43f97601b500..d3a9ae80fd30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -32,6 +32,7 @@
#include <linux/clocksource.h>
#include <linux/highmem.h>
+#include <linux/ptp_clock_kernel.h>
#include <rdma/mlx5-abi.h>
#include "lib/eq.h"
#include "en.h"
@@ -64,21 +65,79 @@ enum {
MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4),
MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
+ MLX5_MTPPS_FS_NPPS_PERIOD = BIT(0x9),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa),
};
+static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
+}
+
+static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_real_time_mode(mdev) &&
+ MLX5_CAP_MCAM_FEATURE(mdev, npps_period) &&
+ MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns));
+}
+
+static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
+}
+
+static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size)
+{
+ u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!MLX5_CAP_MCAM_REG(dev, mtutc))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(dev, mtutc, size, out, sizeof(out),
+ MLX5_REG_MTUTC, 0, 1);
+}
+
+static u64 mlx5_read_time(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts,
+ bool real_time)
+{
+ u32 timer_h, timer_h1, timer_l;
+
+ timer_h = ioread32be(real_time ? &dev->iseg->real_time_h :
+ &dev->iseg->internal_timer_h);
+ ptp_read_system_prets(sts);
+ timer_l = ioread32be(real_time ? &dev->iseg->real_time_l :
+ &dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
+ timer_h1 = ioread32be(real_time ? &dev->iseg->real_time_h :
+ &dev->iseg->internal_timer_h);
+ if (timer_h != timer_h1) {
+ /* wrap around */
+ ptp_read_system_prets(sts);
+ timer_l = ioread32be(real_time ? &dev->iseg->real_time_l :
+ &dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
+ }
+
+ return real_time ? REAL_TIME_TO_NS(timer_h1, timer_l) :
+ (u64)timer_l | (u64)timer_h1 << 32;
+}
+
static u64 read_internal_timer(const struct cyclecounter *cc)
{
- struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
+ struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles);
+ struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer);
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
- return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
+ return mlx5_read_time(mdev, NULL, false) & cc->mask;
}
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_timer *timer;
u32 sign;
if (!clock_info)
@@ -88,10 +147,11 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
smp_store_mb(clock_info->sign,
sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
- clock_info->cycles = clock->tc.cycle_last;
- clock_info->mult = clock->cycles.mult;
- clock_info->nsec = clock->tc.nsec;
- clock_info->frac = clock->tc.frac;
+ timer = &clock->timer;
+ clock_info->cycles = timer->tc.cycle_last;
+ clock_info->mult = timer->cycles.mult;
+ clock_info->nsec = timer->tc.nsec;
+ clock_info->frac = timer->tc.frac;
smp_store_release(&clock_info->sign,
sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
@@ -129,90 +189,186 @@ static void mlx5_pps_out(struct work_struct *work)
static void mlx5_timestamp_overflow(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
- struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
- overflow_work);
+ struct mlx5_core_dev *mdev;
+ struct mlx5_timer *timer;
+ struct mlx5_clock *clock;
unsigned long flags;
+ timer = container_of(dwork, struct mlx5_timer, overflow_work);
+ clock = container_of(timer, struct mlx5_clock, timer);
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
write_seqlock_irqsave(&clock->lock, flags);
- timecounter_read(&clock->tc);
- mlx5_update_clock_info_page(clock->mdev);
+ timecounter_read(&timer->tc);
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
- schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
+ schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
}
-static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
- const struct timespec64 *ts)
+static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
+ const struct timespec64 *ts)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
- u64 ns = timespec64_to_ns(ts);
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!mlx5_modify_mtutc_allowed(mdev))
+ return 0;
+
+ if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX ||
+ ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC)
+ return -EINVAL;
+
+ MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE);
+ MLX5_SET(mtutc_reg, in, utc_sec, ts->tv_sec);
+ MLX5_SET(mtutc_reg, in, utc_nsec, ts->tv_nsec);
+
+ return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
unsigned long flags;
+ int err;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ err = mlx5_ptp_settime_real_time(mdev, ts);
+ if (err)
+ return err;
write_seqlock_irqsave(&clock->lock, flags);
- timecounter_init(&clock->tc, &clock->cycles, ns);
- mlx5_update_clock_info_page(clock->mdev);
+ timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts));
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
+static
+struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev,
+ struct ptp_system_timestamp *sts)
+{
+ struct timespec64 ts;
+ u64 time;
+
+ time = mlx5_read_time(mdev, sts, true);
+ ts = ns_to_timespec64(time);
+ return ts;
+}
+
static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct ptp_system_timestamp *sts)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
- clock);
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
unsigned long flags;
u64 cycles, ns;
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ if (mlx5_real_time_mode(mdev)) {
+ *ts = mlx5_ptp_gettimex_real_time(mdev, sts);
+ goto out;
+ }
+
write_seqlock_irqsave(&clock->lock, flags);
- cycles = mlx5_read_internal_timer(mdev, sts);
- ns = timecounter_cyc2time(&clock->tc, cycles);
+ cycles = mlx5_read_time(mdev, sts, false);
+ ns = timecounter_cyc2time(&timer->tc, cycles);
write_sequnlock_irqrestore(&clock->lock, flags);
-
*ts = ns_to_timespec64(ns);
-
+out:
return 0;
}
+static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta)
+{
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!mlx5_modify_mtutc_allowed(mdev))
+ return 0;
+
+ /* HW time adjustment range is s16. If out of range, settime instead */
+ if (delta < S16_MIN || delta > S16_MAX) {
+ struct timespec64 ts;
+ s64 ns;
+
+ ts = mlx5_ptp_gettimex_real_time(mdev, NULL);
+ ns = timespec64_to_ns(&ts) + delta;
+ ts = ns_to_timespec64(ns);
+ return mlx5_ptp_settime_real_time(mdev, &ts);
+ }
+
+ MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_TIME);
+ MLX5_SET(mtutc_reg, in, time_adjustment, delta);
+
+ return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
unsigned long flags;
+ int err;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ err = mlx5_ptp_adjtime_real_time(mdev, delta);
+ if (err)
+ return err;
write_seqlock_irqsave(&clock->lock, flags);
- timecounter_adjtime(&clock->tc, delta);
- mlx5_update_clock_info_page(clock->mdev);
+ timecounter_adjtime(&timer->tc, delta);
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
+static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq)
+{
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!mlx5_modify_mtutc_allowed(mdev))
+ return 0;
+
+ MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC);
+ MLX5_SET(mtutc_reg, in, freq_adjustment, freq);
+
+ return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
{
- u64 adj;
- u32 diff;
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
unsigned long flags;
int neg_adj = 0;
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
+ u32 diff;
+ u64 adj;
+ int err;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ err = mlx5_ptp_adjfreq_real_time(mdev, delta);
+ if (err)
+ return err;
if (delta < 0) {
neg_adj = 1;
delta = -delta;
}
- adj = clock->nominal_c_mult;
+ adj = timer->nominal_c_mult;
adj *= delta;
diff = div_u64(adj, 1000000000ULL);
write_seqlock_irqsave(&clock->lock, flags);
- timecounter_read(&clock->tc);
- clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
- clock->nominal_c_mult + diff;
- mlx5_update_clock_info_page(clock->mdev);
+ timecounter_read(&timer->tc);
+ timer->cycles.mult = neg_adj ? timer->nominal_c_mult - diff :
+ timer->nominal_c_mult + diff;
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
@@ -252,17 +408,17 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
if (rq->extts.index >= clock->ptp_info.n_pins)
return -EINVAL;
+ pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+ if (pin < 0)
+ return -EBUSY;
+
if (on) {
- pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
- if (pin < 0)
- return -EBUSY;
pin_mode = MLX5_PIN_MODE_IN;
pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
field_select = MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
MLX5_MTPPS_FS_ENABLE;
} else {
- pin = rq->extts.index;
field_select = MLX5_MTPPS_FS_ENABLE;
}
@@ -280,6 +436,129 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
MLX5_EVENT_MODE_REPETETIVE & on);
}
+static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u64 cycles_now, cycles_delta;
+ u64 nsec_now, nsec_delta;
+ struct mlx5_timer *timer;
+ unsigned long flags;
+
+ timer = &clock->timer;
+
+ cycles_now = mlx5_read_time(mdev, NULL, false);
+ write_seqlock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&timer->tc, cycles_now);
+ nsec_delta = target_ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << timer->cycles.shift,
+ timer->cycles.mult);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+
+ return cycles_now + cycles_delta;
+}
+
+static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec)
+{
+ struct timespec64 ts = {};
+ s64 target_ns;
+
+ ts.tv_sec = sec;
+ target_ns = timespec64_to_ns(&ts);
+
+ return find_target_cycles(mdev, target_ns);
+}
+
+static u64 perout_conf_real_time(s64 sec, u32 nsec)
+{
+ return (u64)nsec | (u64)sec << 32;
+}
+
+static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+ u64 *time_stamp, bool real_time)
+{
+ struct timespec64 ts;
+ s64 ns;
+
+ ts.tv_nsec = rq->perout.period.nsec;
+ ts.tv_sec = rq->perout.period.sec;
+ ns = timespec64_to_ns(&ts);
+
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+
+ *time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) :
+ perout_conf_internal_timer(mdev, rq->perout.start.sec);
+
+ return 0;
+}
+
+#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1)
+static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
+ struct ptp_clock_request *rq,
+ u32 *out_pulse_duration_ns)
+{
+ struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ u32 out_pulse_duration;
+ struct timespec64 ts;
+
+ if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) {
+ ts.tv_sec = rq->perout.on.sec;
+ ts.tv_nsec = rq->perout.on.nsec;
+ out_pulse_duration = (u32)timespec64_to_ns(&ts);
+ } else {
+ /* out_pulse_duration_ns should be up to 50% of the
+ * pulse period as default
+ */
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1;
+ }
+
+ if (out_pulse_duration < pps_info->min_out_pulse_duration_ns ||
+ out_pulse_duration > MLX5_MAX_PULSE_DURATION) {
+ mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n",
+ out_pulse_duration, pps_info->min_out_pulse_duration_ns,
+ MLX5_MAX_PULSE_DURATION);
+ return -EINVAL;
+ }
+ *out_pulse_duration_ns = out_pulse_duration;
+
+ return 0;
+}
+
+static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+ u32 *field_select, u32 *out_pulse_duration_ns,
+ u64 *period, u64 *time_stamp)
+{
+ struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct ptp_clock_time *time = &rq->perout.start;
+ struct timespec64 ts;
+
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ if (timespec64_to_ns(&ts) < pps_info->min_npps_period) {
+ mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n",
+ pps_info->min_npps_period);
+ return -EINVAL;
+ }
+ *period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec);
+
+ if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns))
+ return -EINVAL;
+
+ *time_stamp = perout_conf_real_time(time->sec, time->nsec);
+ *field_select |= MLX5_MTPPS_FS_NPPS_PERIOD |
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS;
+
+ return 0;
+}
+
+static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags)
+{
+ return ((!mlx5_npps_real_time_supported(mdev) && flags) ||
+ (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE));
+}
+
static int mlx5_perout_configure(struct ptp_clock_info *ptp,
struct ptp_clock_request *rq,
int on)
@@ -288,61 +567,53 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev =
container_of(clock, struct mlx5_core_dev, clock);
+ bool rt_mode = mlx5_real_time_mode(mdev);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
- u64 nsec_now, nsec_delta, time_stamp = 0;
- u64 cycles_now, cycles_delta;
- struct timespec64 ts;
- unsigned long flags;
+ u32 out_pulse_duration_ns = 0;
u32 field_select = 0;
+ u64 npps_period = 0;
+ u64 time_stamp = 0;
u8 pin_mode = 0;
u8 pattern = 0;
int pin = -1;
int err = 0;
- s64 ns;
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
/* Reject requests with unsupported flags */
- if (rq->perout.flags)
+ if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
return -EOPNOTSUPP;
if (rq->perout.index >= clock->ptp_info.n_pins)
return -EINVAL;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index);
+ if (pin < 0)
+ return -EBUSY;
+
if (on) {
- pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
- rq->perout.index);
- if (pin < 0)
- return -EBUSY;
+ bool rt_mode = mlx5_real_time_mode(mdev);
pin_mode = MLX5_PIN_MODE_OUT;
pattern = MLX5_OUT_PATTERN_PERIODIC;
- ts.tv_sec = rq->perout.period.sec;
- ts.tv_nsec = rq->perout.period.nsec;
- ns = timespec64_to_ns(&ts);
- if ((ns >> 1) != 500000000LL)
+ if (rt_mode && rq->perout.start.sec > U32_MAX)
return -EINVAL;
- ts.tv_sec = rq->perout.start.sec;
- ts.tv_nsec = rq->perout.start.nsec;
- ns = timespec64_to_ns(&ts);
- cycles_now = mlx5_read_internal_timer(mdev, NULL);
- write_seqlock_irqsave(&clock->lock, flags);
- nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
- nsec_delta = ns - nsec_now;
- cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
- clock->cycles.mult);
- write_sequnlock_irqrestore(&clock->lock, flags);
- time_stamp = cycles_now + cycles_delta;
- field_select = MLX5_MTPPS_FS_PIN_MODE |
- MLX5_MTPPS_FS_PATTERN |
- MLX5_MTPPS_FS_ENABLE |
- MLX5_MTPPS_FS_TIME_STAMP;
- } else {
- pin = rq->perout.index;
- field_select = MLX5_MTPPS_FS_ENABLE;
+ field_select |= MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_TIME_STAMP;
+
+ if (mlx5_npps_real_time_supported(mdev))
+ err = perout_conf_npps_real_time(mdev, rq, &field_select,
+ &out_pulse_duration_ns, &npps_period,
+ &time_stamp);
+ else
+ err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
+ if (err)
+ return err;
}
MLX5_SET(mtpps_reg, in, pin, pin);
@@ -351,11 +622,15 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
MLX5_SET(mtpps_reg, in, enable, on);
MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
MLX5_SET(mtpps_reg, in, field_select, field_select);
-
+ MLX5_SET64(mtpps_reg, in, npps_period, npps_period);
+ MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
return err;
+ if (rt_mode)
+ return 0;
+
return mlx5_set_mtppse(mdev, pin, 0,
MLX5_EVENT_MODE_REPETETIVE & on);
}
@@ -388,15 +663,34 @@ static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
return 0;
}
+enum {
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0),
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1),
+};
+
static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
enum ptp_pin_function func, unsigned int chan)
{
- return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0;
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ switch (func) {
+ case PTP_PF_NONE:
+ return 0;
+ case PTP_PF_EXTTS:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN);
+ case PTP_PF_PEROUT:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT);
+ default:
+ return -EOPNOTSUPP;
+ }
}
static const struct ptp_clock_info mlx5_ptp_clock_info = {
.owner = THIS_MODULE,
- .name = "mlx5_p2p",
+ .name = "mlx5_ptp",
.max_adj = 100000000,
.n_alarm = 0,
.n_ext_ts = 0,
@@ -411,16 +705,52 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.verify = NULL,
};
-static int mlx5_init_pin_config(struct mlx5_clock *clock)
+static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
+ u32 *mtpps, u32 mtpps_size)
+{
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+ mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
+{
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+ u8 mode;
+ int err;
+
+ err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out));
+ if (err || !MLX5_GET(mtpps_reg, out, enable))
+ return PTP_PF_NONE;
+
+ mode = MLX5_GET(mtpps_reg, out, pin_mode);
+
+ if (mode == MLX5_PIN_MODE_IN)
+ return PTP_PF_EXTTS;
+ else if (mode == MLX5_PIN_MODE_OUT)
+ return PTP_PF_PEROUT;
+
+ return PTP_PF_NONE;
+}
+
+static void mlx5_init_pin_config(struct mlx5_clock *clock)
{
int i;
+ if (!clock->ptp_info.n_pins)
+ return;
+
clock->ptp_info.pin_config =
kcalloc(clock->ptp_info.n_pins,
sizeof(*clock->ptp_info.pin_config),
GFP_KERNEL);
if (!clock->ptp_info.pin_config)
- return -ENOMEM;
+ return;
clock->ptp_info.enable = mlx5_ptp_enable;
clock->ptp_info.verify = mlx5_ptp_verify;
clock->ptp_info.pps = 1;
@@ -430,11 +760,9 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock)
sizeof(clock->ptp_info.pin_config[i].name),
"mlx5_pps%d", i);
clock->ptp_info.pin_config[i].index = i;
- clock->ptp_info.pin_config[i].func = PTP_PF_NONE;
- clock->ptp_info.pin_config[i].chan = i;
+ clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
+ clock->ptp_info.pin_config[i].chan = 0;
}
-
- return 0;
}
static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
@@ -451,6 +779,13 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
cap_max_num_of_pps_out_pins);
+ if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period))
+ clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out,
+ cap_log_min_npps_period);
+ if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns))
+ clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out,
+ cap_log_min_out_pulse_duration_ns);
+
clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
@@ -461,24 +796,46 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
}
+static void ts_next_sec(struct timespec64 *ts)
+{
+ ts->tv_sec += 1;
+ ts->tv_nsec = 0;
+}
+
+static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev,
+ struct mlx5_clock *clock)
+{
+ struct timespec64 ts;
+ s64 target_ns;
+
+ mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+ ts_next_sec(&ts);
+ target_ns = timespec64_to_ns(&ts);
+
+ return find_target_cycles(mdev, target_ns);
+}
+
static int mlx5_pps_event(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
- struct mlx5_core_dev *mdev = clock->mdev;
struct ptp_clock_event ptp_event;
- u64 cycles_now, cycles_delta;
- u64 nsec_now, nsec_delta, ns;
struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- struct timespec64 ts;
+ struct mlx5_core_dev *mdev;
unsigned long flags;
+ u64 ns;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
switch (clock->ptp_info.pin_config[pin].func) {
case PTP_PF_EXTTS:
ptp_event.index = pin;
- ptp_event.timestamp = timecounter_cyc2time(&clock->tc,
- be64_to_cpu(eqe->data.pps.time_stamp));
+ ptp_event.timestamp = mlx5_real_time_mode(mdev) ?
+ mlx5_real_time_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp)) :
+ mlx5_timecounter_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp));
if (clock->pps_info.enabled) {
ptp_event.type = PTP_CLOCK_PPSUSR;
ptp_event.pps_times.ts_real =
@@ -486,23 +843,15 @@ static int mlx5_pps_event(struct notifier_block *nb,
} else {
ptp_event.type = PTP_CLOCK_EXTTS;
}
- /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
+ /* TODOL clock->ptp can be NULL if ptp_clock_register fails */
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
- mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
- cycles_now = mlx5_read_internal_timer(mdev, NULL);
- ts.tv_sec += 1;
- ts.tv_nsec = 0;
- ns = timespec64_to_ns(&ts);
+ ns = perout_conf_next_event_timer(mdev, clock);
write_seqlock_irqsave(&clock->lock, flags);
- nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
- nsec_delta = ns - nsec_now;
- cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
- clock->cycles.mult);
- clock->pps_info.start[pin] = cycles_now + cycles_delta;
- schedule_work(&clock->pps_info.out_work);
+ clock->pps_info.start[pin] = ns;
write_sequnlock_irqrestore(&clock->lock, flags);
+ schedule_work(&clock->pps_info.out_work);
break;
default:
mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
@@ -512,30 +861,32 @@ static int mlx5_pps_event(struct notifier_block *nb,
return NOTIFY_OK;
}
-void mlx5_init_clock(struct mlx5_core_dev *mdev)
+static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
- u64 overflow_cycles;
- u64 ns;
- u64 frac = 0;
+ struct mlx5_timer *timer = &clock->timer;
u32 dev_freq;
dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
- if (!dev_freq) {
- mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
- return;
- }
- seqlock_init(&clock->lock);
- clock->cycles.read = read_internal_timer;
- clock->cycles.shift = MLX5_CYCLES_SHIFT;
- clock->cycles.mult = clocksource_khz2mult(dev_freq,
- clock->cycles.shift);
- clock->nominal_c_mult = clock->cycles.mult;
- clock->cycles.mask = CLOCKSOURCE_MASK(41);
- clock->mdev = mdev;
-
- timecounter_init(&clock->tc, &clock->cycles,
+ timer->cycles.read = read_internal_timer;
+ timer->cycles.shift = MLX5_CYCLES_SHIFT;
+ timer->cycles.mult = clocksource_khz2mult(dev_freq,
+ timer->cycles.shift);
+ timer->nominal_c_mult = timer->cycles.mult;
+ timer->cycles.mask = CLOCKSOURCE_MASK(41);
+
+ timecounter_init(&timer->tc, &timer->cycles,
ktime_to_ns(ktime_get_real()));
+}
+
+static void mlx5_init_overflow_period(struct mlx5_clock *clock)
+{
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_timer *timer = &clock->timer;
+ u64 overflow_cycles;
+ u64 frac = 0;
+ u64 ns;
/* Calculate period in seconds to call the overflow watchdog - to make
* sure counter is checked at least twice every wrap around.
@@ -544,41 +895,94 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
* multiplied by clock multiplier where the result doesn't exceed
* 64bits.
*/
- overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
- overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
+ overflow_cycles = div64_u64(~0ULL >> 1, timer->cycles.mult);
+ overflow_cycles = min(overflow_cycles, div_u64(timer->cycles.mask, 3));
- ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
+ ns = cyclecounter_cyc2ns(&timer->cycles, overflow_cycles,
frac, &frac);
do_div(ns, NSEC_PER_SEC / HZ);
- clock->overflow_period = ns;
+ timer->overflow_period = ns;
- mdev->clock_info =
- (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL);
- if (mdev->clock_info) {
- mdev->clock_info->nsec = clock->tc.nsec;
- mdev->clock_info->cycles = clock->tc.cycle_last;
- mdev->clock_info->mask = clock->cycles.mask;
- mdev->clock_info->mult = clock->nominal_c_mult;
- mdev->clock_info->shift = clock->cycles.shift;
- mdev->clock_info->frac = clock->tc.frac;
- mdev->clock_info->overflow_period = clock->overflow_period;
+ INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow);
+ if (timer->overflow_period)
+ schedule_delayed_work(&timer->overflow_work, 0);
+ else
+ mlx5_core_warn(mdev,
+ "invalid overflow period, overflow_work is not scheduled\n");
+
+ if (clock_info)
+ clock_info->overflow_period = timer->overflow_period;
+}
+
+static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_ib_clock_info *info;
+ struct mlx5_timer *timer;
+
+ mdev->clock_info = (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL);
+ if (!mdev->clock_info) {
+ mlx5_core_warn(mdev, "Failed to allocate IB clock info page\n");
+ return;
+ }
+
+ info = mdev->clock_info;
+ timer = &clock->timer;
+
+ info->nsec = timer->tc.nsec;
+ info->cycles = timer->tc.cycle_last;
+ info->mask = timer->cycles.mask;
+ info->mult = timer->nominal_c_mult;
+ info->shift = timer->cycles.shift;
+ info->frac = timer->tc.frac;
+}
+
+static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ mlx5_timecounter_init(mdev);
+ mlx5_init_clock_info(mdev);
+ mlx5_init_overflow_period(clock);
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ if (mlx5_real_time_mode(mdev)) {
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+ mlx5_ptp_settime(&clock->ptp_info, &ts);
}
+}
+
+static void mlx5_init_pps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ if (!MLX5_PPS_CAP(mdev))
+ return;
+
+ mlx5_get_pps_caps(mdev);
+ mlx5_init_pin_config(clock);
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return;
+ }
+
+ seqlock_init(&clock->lock);
+ mlx5_init_timer_clock(mdev);
INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
- INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
- if (clock->overflow_period)
- schedule_delayed_work(&clock->overflow_work, 0);
- else
- mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n");
/* Configure the PHC */
clock->ptp_info = mlx5_ptp_clock_info;
/* Initialize 1PPS data structures */
- if (MLX5_PPS_CAP(mdev))
- mlx5_get_pps_caps(mdev);
- if (clock->ptp_info.n_pins)
- mlx5_init_pin_config(clock);
+ mlx5_init_pps(mdev);
clock->ptp = ptp_clock_register(&clock->ptp_info,
&mdev->pdev->dev);
@@ -606,7 +1010,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
}
cancel_work_sync(&clock->pps_info.out_work);
- cancel_delayed_work_sync(&clock->overflow_work);
+ cancel_delayed_work_sync(&clock->timer.overflow_work);
if (mdev->clock_info) {
free_page((unsigned long)mdev->clock_info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 31600924bdc3..bd95b9f8d143 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -33,6 +33,26 @@
#ifndef __LIB_CLOCK_H__
#define __LIB_CLOCK_H__
+static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev)
+{
+ u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format);
+
+ return (rq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+ rq_ts_format_cap ==
+ MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME);
+}
+
+static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev)
+{
+ u8 sq_ts_format_cap = MLX5_CAP_GEN(mdev, sq_ts_format);
+
+ return (sq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+ sq_ts_format_cap ==
+ MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME);
+}
+
+typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64);
+
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
@@ -45,17 +65,27 @@ static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
u64 timestamp)
{
+ struct mlx5_timer *timer = &clock->timer;
unsigned int seq;
u64 nsec;
do {
seq = read_seqbegin(&clock->lock);
- nsec = timecounter_cyc2time(&clock->tc, timestamp);
+ nsec = timecounter_cyc2time(&timer->tc, timestamp);
} while (read_seqretry(&clock->lock, seq));
return ns_to_ktime(nsec);
}
+#define REAL_TIME_TO_NS(hi, low) (((u64)hi) * NSEC_PER_SEC + ((u64)low))
+
+static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ u64 time = REAL_TIME_TO_NS(timestamp >> 32, timestamp & 0xFFFFFFFF);
+
+ return ns_to_ktime(time);
+}
#else
static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
@@ -69,6 +99,23 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
{
return 0;
}
+
+static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ return 0;
+}
#endif
+static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev)
+{
+ return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time :
+ mlx5_timecounter_cyc2time;
+}
+
+static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev)
+{
+ return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time :
+ mlx5_timecounter_cyc2time;
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
index 3fc575d1c3ec..e995f8378df7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -6,7 +6,7 @@
int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
void *key, u32 sz_bytes,
- u32 *p_key_id)
+ u32 key_type, u32 *p_key_id)
{
u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
@@ -41,13 +41,12 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
memcpy(key_p, key, sz_bytes);
MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
- MLX5_SET(encryption_key_obj, obj, key_type,
- MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK);
+ MLX5_SET(encryption_key_obj, obj, key_type, key_type);
MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
- MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
if (!err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index bced2efe9bef..adefde3ea941 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -14,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component {
struct {
void *data;
- } device[MLX5_MAX_PORTS];
+ } device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler;
struct rw_semaphore sem;
@@ -25,7 +25,7 @@ struct mlx5_devcom_list {
struct list_head list;
struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
- struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+ struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED];
};
struct mlx5_devcom {
@@ -74,13 +74,15 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!mlx5_core_is_pf(dev))
return NULL;
+ if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
+ return NULL;
sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL;
idx = -1;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
if (iter->devs[i])
tmp_dev = iter->devs[i];
else
@@ -134,11 +136,11 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
kfree(devcom);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (priv->devs[i])
break;
- if (i != MLX5_MAX_PORTS)
+ if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
return;
list_del(&priv->list);
@@ -191,7 +193,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx && comp->device[i].data) {
err = comp->handler(event, comp->device[i].data,
event_data);
@@ -239,7 +241,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
return NULL;
}
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx)
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index 939d5bf1581b..94313c18bb64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -6,6 +6,8 @@
#include <linux/mlx5/driver.h>
+#define MLX5_DEVCOM_PORTS_SUPPORTED 2
+
enum mlx5_devcom_components {
MLX5_DEVCOM_ESW_OFFLOADS,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
index e065c2f68f5a..9482e51ac82a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -12,16 +12,19 @@ struct mlx5_dm {
spinlock_t lock;
unsigned long *steering_sw_icm_alloc_blocks;
unsigned long *header_modify_sw_icm_alloc_blocks;
+ unsigned long *header_modify_pattern_sw_icm_alloc_blocks;
};
struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
{
+ u64 header_modify_pattern_icm_blocks = 0;
u64 header_modify_icm_blocks = 0;
u64 steering_icm_blocks = 0;
struct mlx5_dm *dm;
+ bool support_v2;
if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
- return 0;
+ return NULL;
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
@@ -35,8 +38,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
dm->steering_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(steering_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
+ bitmap_zalloc(steering_icm_blocks, GFP_KERNEL);
if (!dm->steering_sw_icm_alloc_blocks)
goto err_steering;
}
@@ -47,16 +49,33 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
dm->header_modify_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
+ bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL);
if (!dm->header_modify_sw_icm_alloc_blocks)
goto err_modify_hdr;
}
+ support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) &&
+ MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address);
+
+ if (support_v2) {
+ header_modify_pattern_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_pattern_sw_icm_alloc_blocks =
+ bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL);
+ if (!dm->header_modify_pattern_sw_icm_alloc_blocks)
+ goto err_pattern;
+ }
+
return dm;
+err_pattern:
+ bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+
err_modify_hdr:
- kfree(dm->steering_sw_icm_alloc_blocks);
+ bitmap_free(dm->steering_sw_icm_alloc_blocks);
err_steering:
kfree(dm);
@@ -75,7 +94,7 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
- kfree(dm->steering_sw_icm_alloc_blocks);
+ bitmap_free(dm->steering_sw_icm_alloc_blocks);
}
if (dm->header_modify_sw_icm_alloc_blocks) {
@@ -83,14 +102,23 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
BIT(MLX5_CAP_DEV_MEM(dev,
log_header_modify_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
- kfree(dm->header_modify_sw_icm_alloc_blocks);
+ bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+ }
+
+ if (dm->header_modify_pattern_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks);
}
kfree(dm);
}
int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
- u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id)
+ u64 length, u32 log_alignment, u16 uid,
+ phys_addr_t *addr, u32 *obj_id)
{
u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
@@ -99,6 +127,7 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
unsigned long *block_map;
u64 icm_start_addr;
u32 log_icm_size;
+ u64 align_mask;
u32 max_blocks;
u64 block_idx;
void *sw_icm;
@@ -128,6 +157,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
log_header_modify_sw_icm_size);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
@@ -136,11 +172,14 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
return -EOPNOTSUPP;
max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ if (log_alignment < MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+ log_alignment = MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+ align_mask = BIT(log_alignment - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) - 1;
+
spin_lock(&dm->lock);
- block_idx = bitmap_find_next_zero_area(block_map,
- max_blocks,
- 0,
- num_blocks, 0);
+ block_idx = bitmap_find_next_zero_area(block_map, max_blocks, 0,
+ num_blocks, align_mask);
if (block_idx < max_blocks)
bitmap_set(block_map,
@@ -198,6 +237,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type
icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 4be4d2d36218..d3d628b862f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies */
+/* Copyright (c) 2018-2021, Mellanox Technologies inc. All rights reserved. */
#ifndef __LIB_MLX5_EQ_H__
#define __LIB_MLX5_EQ_H__
@@ -22,22 +22,23 @@ struct mlx5_cq_table {
};
struct mlx5_eq {
+ struct mlx5_frag_buf_ctrl fbc;
+ struct mlx5_frag_buf frag_buf;
struct mlx5_core_dev *dev;
struct mlx5_cq_table cq_table;
__be32 __iomem *doorbell;
u32 cons_index;
- struct mlx5_frag_buf buf;
- int size;
unsigned int vecidx;
unsigned int irqn;
u8 eqn;
- int nent;
struct mlx5_rsc_debug *dbg;
+ struct mlx5_irq *irq;
};
struct mlx5_eq_async {
struct mlx5_eq core;
struct notifier_block irq_nb;
+ spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */
};
struct mlx5_eq_comp {
@@ -47,16 +48,21 @@ struct mlx5_eq_comp {
struct list_head list;
};
+static inline u32 eq_get_size(struct mlx5_eq *eq)
+{
+ return eq->fbc.sz_m1 + 1;
+}
+
static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
{
- return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+ return mlx5_frag_buf_get_wqe(&eq->fbc, entry);
}
static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
{
- struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1);
- return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+ return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe;
}
static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
@@ -78,10 +84,11 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
-void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t);
struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
@@ -97,4 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
#endif
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
new file mode 100644
index 000000000000..df58cba37930
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies.
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/fs.h>
+
+#include "lib/fs_chains.h"
+#include "fs_ft_pool.h"
+#include "en/mapping.h"
+#include "fs_core.h"
+#include "en_tc.h"
+
+#define chains_lock(chains) ((chains)->lock)
+#define chains_ht(chains) ((chains)->chains_ht)
+#define prios_ht(chains) ((chains)->prios_ht)
+#define tc_default_ft(chains) ((chains)->tc_default_ft)
+#define tc_end_ft(chains) ((chains)->tc_end_ft)
+#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
+#define FT_TBL_SZ (64 * 1024)
+
+struct mlx5_fs_chains {
+ struct mlx5_core_dev *dev;
+
+ struct rhashtable chains_ht;
+ struct rhashtable prios_ht;
+ /* Protects above chains_ht and prios_ht */
+ struct mutex lock;
+
+ struct mlx5_flow_table *tc_default_ft;
+ struct mlx5_flow_table *tc_end_ft;
+ struct mapping_ctx *chains_mapping;
+
+ enum mlx5_flow_namespace_type ns;
+ u32 group_num;
+ u32 flags;
+};
+
+struct fs_chain {
+ struct rhash_head node;
+
+ u32 chain;
+
+ int ref;
+ int id;
+
+ struct mlx5_fs_chains *chains;
+ struct list_head prios_list;
+ struct mlx5_flow_handle *restore_rule;
+ struct mlx5_modify_hdr *miss_modify_hdr;
+};
+
+struct prio_key {
+ u32 chain;
+ u32 prio;
+ u32 level;
+};
+
+struct prio {
+ struct rhash_head node;
+ struct list_head list;
+
+ struct prio_key key;
+
+ int ref;
+
+ struct fs_chain *chain;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+};
+
+static const struct rhashtable_params chain_params = {
+ .head_offset = offsetof(struct fs_chain, node),
+ .key_offset = offsetof(struct fs_chain, chain),
+ .key_len = sizeof_field(struct fs_chain, chain),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params prio_params = {
+ .head_offset = offsetof(struct prio, node),
+ .key_offset = offsetof(struct prio, key),
+ .key_len = sizeof_field(struct prio, key),
+ .automatic_shrinking = true,
+};
+
+bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
+{
+ return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+}
+
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{
+ return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+}
+
+bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains)
+{
+ return mlx5_chains_prios_supported(chains) &&
+ mlx5_chains_ignore_flow_level_supported(chains);
+}
+
+u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ return 1;
+
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX - 1;
+
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_CHAIN;
+}
+
+u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
+{
+ return mlx5_chains_get_chain_range(chains) + 1;
+}
+
+u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
+ if (!chains->dev->priv.eswitch ||
+ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
+ return 1;
+
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_PRIO;
+}
+
+static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
+ /* Same value for FDB and NIC RX tables */
+ return FDB_TC_LEVELS_PER_PRIO;
+}
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ tc_end_ft(chains) = ft;
+}
+
+static struct mlx5_flow_table *
+mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ int sz;
+
+ if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED)
+ ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE;
+ ft_attr.max_fte = sz;
+
+ /* We use tc_default_ft(chains) as the table's next_ft till
+ * ignore_flow_level is allowed on FT creation and not just for FTEs.
+ * Instead caller should add an explicit miss rule if needed.
+ */
+ ft_attr.next_ft = tc_default_ft(chains);
+
+ /* The root table(chain 0, prio 1, level 0) is required to be
+ * connected to the previous fs_core managed prio.
+ * We always create it, as a managed table, in order to align with
+ * fs_core logic.
+ */
+ if (!mlx5_chains_ignore_flow_level_supported(chains) ||
+ (chain == 0 && prio == 1 && level == 0)) {
+ ft_attr.level = level;
+ ft_attr.prio = prio - 1;
+ ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
+ mlx5_get_fdb_sub_ns(chains->dev, chain) :
+ mlx5_get_flow_namespace(chains->dev, chains->ns);
+ } else {
+ ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ns_to_chains_fs_prio(chains->ns);
+ /* Firmware doesn't allow us to create another level 0 table,
+ * so we create all unmanaged tables as level 1.
+ *
+ * To connect them, we use explicit miss rules with
+ * ignore_flow_level. Caller is responsible to create
+ * these rules (if needed).
+ */
+ ft_attr.level = 1;
+ ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
+ }
+
+ ft_attr.autogroup.num_reserved_entries = 2;
+ ft_attr.autogroup.max_num_groups = chains->group_num;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
+ (int)PTR_ERR(ft), chain, prio, level, sz);
+ return ft;
+ }
+
+ return ft;
+}
+
+static int
+create_chain_restore(struct fs_chain *chain)
+{
+ struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_fs_chains *chains = chain->chains;
+ enum mlx5e_tc_attr_to_reg chain_to_reg;
+ struct mlx5_modify_hdr *mod_hdr;
+ u32 index;
+ int err;
+
+ if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
+ !mlx5_chains_prios_supported(chains))
+ return 0;
+
+ err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+ if (err)
+ return err;
+ if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
+ /* we got the special default flow tag id, so we won't know
+ * if we actually marked the packet with the restore rule
+ * we create.
+ *
+ * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
+ */
+ err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+ mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG);
+ if (err)
+ return err;
+ }
+
+ chain->id = index;
+
+ if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
+ chain_to_reg = CHAIN_TO_REG;
+ chain->restore_rule = esw_add_restore_rule(esw, chain->id);
+ if (IS_ERR(chain->restore_rule)) {
+ err = PTR_ERR(chain->restore_rule);
+ goto err_rule;
+ }
+ } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) {
+ /* For NIC RX we don't need a restore rule
+ * since we write the metadata to reg_b
+ * that is passed to SW directly.
+ */
+ chain_to_reg = NIC_CHAIN_TO_REG;
+ } else {
+ err = -EINVAL;
+ goto err_rule;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
+ MLX5_SET(set_action_in, modact, offset,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset);
+ MLX5_SET(set_action_in, modact, length,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ?
+ 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen);
+ MLX5_SET(set_action_in, modact, data, chain->id);
+ mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
+ 1, modact);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ goto err_mod_hdr;
+ }
+ chain->miss_modify_hdr = mod_hdr;
+
+ return 0;
+
+err_mod_hdr:
+ if (!IS_ERR_OR_NULL(chain->restore_rule))
+ mlx5_del_flow_rules(chain->restore_rule);
+err_rule:
+ /* Datapath can't find this mapping, so we can safely remove it */
+ mapping_remove(chains->chains_mapping, chain->id);
+ return err;
+}
+
+static void destroy_chain_restore(struct fs_chain *chain)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+
+ if (!chain->miss_modify_hdr)
+ return;
+
+ if (chain->restore_rule)
+ mlx5_del_flow_rules(chain->restore_rule);
+
+ mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
+ mapping_remove(chains->chains_mapping, chain->id);
+}
+
+static struct fs_chain *
+mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+ struct fs_chain *chain_s = NULL;
+ int err;
+
+ chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL);
+ if (!chain_s)
+ return ERR_PTR(-ENOMEM);
+
+ chain_s->chains = chains;
+ chain_s->chain = chain;
+ INIT_LIST_HEAD(&chain_s->prios_list);
+
+ err = create_chain_restore(chain_s);
+ if (err)
+ goto err_restore;
+
+ err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node,
+ chain_params);
+ if (err)
+ goto err_insert;
+
+ return chain_s;
+
+err_insert:
+ destroy_chain_restore(chain_s);
+err_restore:
+ kvfree(chain_s);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_chain(struct fs_chain *chain)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+
+ rhashtable_remove_fast(&chains_ht(chains), &chain->node,
+ chain_params);
+
+ destroy_chain_restore(chain);
+ kvfree(chain);
+}
+
+static struct fs_chain *
+mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+ struct fs_chain *chain_s;
+
+ chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain,
+ chain_params);
+ if (!chain_s) {
+ chain_s = mlx5_chains_create_chain(chains, chain);
+ if (IS_ERR(chain_s))
+ return chain_s;
+ }
+
+ chain_s->ref++;
+
+ return chain_s;
+}
+
+static struct mlx5_flow_handle *
+mlx5_chains_add_miss_rule(struct fs_chain *chain,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act act = {};
+
+ act.flags = FLOW_ACT_NO_APPEND;
+ if (mlx5_chains_ignore_flow_level_supported(chain->chains))
+ act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = next_ft;
+
+ if (next_ft == tc_end_ft(chains) &&
+ chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
+ mlx5_chains_prios_supported(chains)) {
+ act.modify_hdr = chain->miss_modify_hdr;
+ act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+
+ return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+mlx5_chains_update_prio_prevs(struct prio *prio,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
+ struct fs_chain *chain = prio->chain;
+ struct prio *pos;
+ int n = 0, err;
+
+ if (prio->key.level)
+ return 0;
+
+ /* Iterate in reverse order until reaching the level 0 rule of
+ * the previous priority, adding all the miss rules first, so we can
+ * revert them if any of them fails.
+ */
+ pos = prio;
+ list_for_each_entry_continue_reverse(pos,
+ &chain->prios_list,
+ list) {
+ miss_rules[n] = mlx5_chains_add_miss_rule(chain,
+ pos->ft,
+ next_ft);
+ if (IS_ERR(miss_rules[n])) {
+ err = PTR_ERR(miss_rules[n]);
+ goto err_prev_rule;
+ }
+
+ n++;
+ if (!pos->key.level)
+ break;
+ }
+
+ /* Success, delete old miss rules, and update the pointers. */
+ n = 0;
+ pos = prio;
+ list_for_each_entry_continue_reverse(pos,
+ &chain->prios_list,
+ list) {
+ mlx5_del_flow_rules(pos->miss_rule);
+
+ pos->miss_rule = miss_rules[n];
+ pos->next_ft = next_ft;
+
+ n++;
+ if (!pos->key.level)
+ break;
+ }
+
+ return 0;
+
+err_prev_rule:
+ while (--n >= 0)
+ mlx5_del_flow_rules(miss_rules[n]);
+
+ return err;
+}
+
+static void
+mlx5_chains_put_chain(struct fs_chain *chain)
+{
+ if (--chain->ref == 0)
+ mlx5_chains_destroy_chain(chain);
+}
+
+static struct prio *
+mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_table *ft;
+ struct fs_chain *chain_s;
+ struct list_head *pos;
+ struct prio *prio_s;
+ u32 *flow_group_in;
+ int err;
+
+ chain_s = mlx5_chains_get_chain(chains, chain);
+ if (IS_ERR(chain_s))
+ return ERR_CAST(chain_s);
+
+ prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL);
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!prio_s || !flow_group_in) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ /* Chain's prio list is sorted by prio and level.
+ * And all levels of some prio point to the next prio's level 0.
+ * Example list (prio, level):
+ * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
+ * In hardware, we will we have the following pointers:
+ * (3,0) -> (5,0) -> (7,0) -> Slow path
+ * (3,1) -> (5,0)
+ * (5,1) -> (7,0)
+ * (6,1) -> (7,0)
+ */
+
+ /* Default miss for each chain: */
+ next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+ tc_default_ft(chains) :
+ tc_end_ft(chains);
+ list_for_each(pos, &chain_s->prios_list) {
+ struct prio *p = list_entry(pos, struct prio, list);
+
+ /* exit on first pos that is larger */
+ if (prio < p->key.prio || (prio == p->key.prio &&
+ level < p->key.level)) {
+ /* Get next level 0 table */
+ next_ft = p->key.level == 0 ? p->ft : p->next_ft;
+ break;
+ }
+ }
+
+ ft = mlx5_chains_create_table(chains, chain, prio, level);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_create;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+ ft->max_fte - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ft->max_fte - 1);
+ miss_group = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(miss_group)) {
+ err = PTR_ERR(miss_group);
+ goto err_group;
+ }
+
+ /* Add miss rule to next_ft */
+ miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft);
+ if (IS_ERR(miss_rule)) {
+ err = PTR_ERR(miss_rule);
+ goto err_miss_rule;
+ }
+
+ prio_s->miss_group = miss_group;
+ prio_s->miss_rule = miss_rule;
+ prio_s->next_ft = next_ft;
+ prio_s->chain = chain_s;
+ prio_s->key.chain = chain;
+ prio_s->key.prio = prio;
+ prio_s->key.level = level;
+ prio_s->ft = ft;
+
+ err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node,
+ prio_params);
+ if (err)
+ goto err_insert;
+
+ list_add(&prio_s->list, pos->prev);
+
+ /* Table is ready, connect it */
+ err = mlx5_chains_update_prio_prevs(prio_s, ft);
+ if (err)
+ goto err_update;
+
+ kvfree(flow_group_in);
+ return prio_s;
+
+err_update:
+ list_del(&prio_s->list);
+ rhashtable_remove_fast(&prios_ht(chains), &prio_s->node,
+ prio_params);
+err_insert:
+ mlx5_del_flow_rules(miss_rule);
+err_miss_rule:
+ mlx5_destroy_flow_group(miss_group);
+err_group:
+ mlx5_destroy_flow_table(ft);
+err_create:
+err_alloc:
+ kvfree(prio_s);
+ kvfree(flow_group_in);
+ mlx5_chains_put_chain(chain_s);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
+ struct prio *prio)
+{
+ struct fs_chain *chain = prio->chain;
+
+ WARN_ON(mlx5_chains_update_prio_prevs(prio,
+ prio->next_ft));
+
+ list_del(&prio->list);
+ rhashtable_remove_fast(&prios_ht(chains), &prio->node,
+ prio_params);
+ mlx5_del_flow_rules(prio->miss_rule);
+ mlx5_destroy_flow_group(prio->miss_group);
+ mlx5_destroy_flow_table(prio->ft);
+ mlx5_chains_put_chain(chain);
+ kvfree(prio);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level)
+{
+ struct mlx5_flow_table *prev_fts;
+ struct prio *prio_s;
+ struct prio_key key;
+ int l = 0;
+
+ if ((chain > mlx5_chains_get_chain_range(chains) &&
+ chain != mlx5_chains_get_nf_ft_chain(chains)) ||
+ prio > mlx5_chains_get_prio_range(chains) ||
+ level > mlx5_chains_get_level_range(chains))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /* create earlier levels for correct fs_core lookup when
+ * connecting tables.
+ */
+ for (l = 0; l < level; l++) {
+ prev_fts = mlx5_chains_get_table(chains, chain, prio, l);
+ if (IS_ERR(prev_fts)) {
+ prio_s = ERR_CAST(prev_fts);
+ goto err_get_prevs;
+ }
+ }
+
+ key.chain = chain;
+ key.prio = prio;
+ key.level = level;
+
+ mutex_lock(&chains_lock(chains));
+ prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+ prio_params);
+ if (!prio_s) {
+ prio_s = mlx5_chains_create_prio(chains, chain,
+ prio, level);
+ if (IS_ERR(prio_s))
+ goto err_create_prio;
+ }
+
+ ++prio_s->ref;
+ mutex_unlock(&chains_lock(chains));
+
+ return prio_s->ft;
+
+err_create_prio:
+ mutex_unlock(&chains_lock(chains));
+err_get_prevs:
+ while (--l >= 0)
+ mlx5_chains_put_table(chains, chain, prio, l);
+ return ERR_CAST(prio_s);
+}
+
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level)
+{
+ struct prio *prio_s;
+ struct prio_key key;
+
+ key.chain = chain;
+ key.prio = prio;
+ key.level = level;
+
+ mutex_lock(&chains_lock(chains));
+ prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+ prio_params);
+ if (!prio_s)
+ goto err_get_prio;
+
+ if (--prio_s->ref == 0)
+ mlx5_chains_destroy_prio(chains, prio_s);
+ mutex_unlock(&chains_lock(chains));
+
+ while (level-- > 0)
+ mlx5_chains_put_table(chains, chain, prio, level);
+
+ return;
+
+err_get_prio:
+ mutex_unlock(&chains_lock(chains));
+ WARN_ONCE(1,
+ "Couldn't find table: (chain: %d prio: %d level: %d)",
+ chain, prio, level);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
+{
+ return tc_end_ft(chains);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains)
+{
+ u32 chain, prio, level;
+ int err;
+
+ if (!mlx5_chains_ignore_flow_level_supported(chains)) {
+ err = -EOPNOTSUPP;
+
+ mlx5_core_warn(chains->dev,
+ "Couldn't create global flow table, ignore_flow_level not supported.");
+ goto err_ignore;
+ }
+
+ chain = mlx5_chains_get_chain_range(chains),
+ prio = mlx5_chains_get_prio_range(chains);
+ level = mlx5_chains_get_level_range(chains);
+
+ return mlx5_chains_create_table(chains, chain, prio, level);
+
+err_ignore:
+ return ERR_PTR(err);
+}
+
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ mlx5_destroy_flow_table(ft);
+}
+
+static struct mlx5_fs_chains *
+mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+ struct mlx5_fs_chains *chains_priv;
+ u32 max_flow_counter;
+ int err;
+
+ chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
+ if (!chains_priv)
+ return ERR_PTR(-ENOMEM);
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ mlx5_core_dbg(dev,
+ "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n",
+ max_flow_counter, attr->max_grp_num, attr->max_ft_sz);
+
+ chains_priv->dev = dev;
+ chains_priv->flags = attr->flags;
+ chains_priv->ns = attr->ns;
+ chains_priv->group_num = attr->max_grp_num;
+ chains_priv->chains_mapping = attr->mapping;
+ tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
+
+ mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
+ mlx5_chains_get_chain_range(chains_priv),
+ mlx5_chains_get_prio_range(chains_priv));
+
+ err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
+ if (err)
+ goto init_chains_ht_err;
+
+ err = rhashtable_init(&prios_ht(chains_priv), &prio_params);
+ if (err)
+ goto init_prios_ht_err;
+
+ mutex_init(&chains_lock(chains_priv));
+
+ return chains_priv;
+
+init_prios_ht_err:
+ rhashtable_destroy(&chains_ht(chains_priv));
+init_chains_ht_err:
+ kfree(chains_priv);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
+{
+ mutex_destroy(&chains_lock(chains));
+ rhashtable_destroy(&prios_ht(chains));
+ rhashtable_destroy(&chains_ht(chains));
+
+ kfree(chains);
+}
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+ struct mlx5_fs_chains *chains;
+
+ chains = mlx5_chains_init(dev, attr);
+
+ return chains;
+}
+
+void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains)
+{
+ mlx5_chains_cleanup(chains);
+}
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+ u32 *chain_mapping)
+{
+ struct mapping_ctx *ctx = chains->chains_mapping;
+ struct mlx5_mapped_obj mapped_obj = {};
+
+ mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN;
+ mapped_obj.chain = chain;
+ return mapping_add(ctx, &mapped_obj, chain_mapping);
+}
+
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
+{
+ struct mapping_ctx *ctx = chains->chains_mapping;
+
+ return mapping_remove(ctx, chain_mapping);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
new file mode 100644
index 000000000000..d50bdb226cef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_ESW_CHAINS_H__
+#define __ML5_ESW_CHAINS_H__
+
+#include <linux/mlx5/fs.h>
+
+struct mlx5_fs_chains;
+struct mlx5_mapped_obj;
+
+enum mlx5_chains_flags {
+ MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1),
+ MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2),
+};
+
+struct mlx5_chains_attr {
+ enum mlx5_flow_namespace_type ns;
+ u32 flags;
+ u32 max_ft_sz;
+ u32 max_grp_num;
+ struct mlx5_flow_table *default_ft;
+ struct mapping_ctx *mapping;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+bool
+mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains);
+bool
+mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level);
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level);
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains);
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft);
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+ u32 *chain_mapping);
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains,
+ u32 chain_mapping);
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
+void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline bool
+mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{ return false; }
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level) {};
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{ return NULL; }
+static inline void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains) {};
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
new file mode 100644
index 000000000000..b78f2ba25c19
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS 3
+#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\
+ MLX5_TTC_GROUP2_SIZE +\
+ MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS 3
+#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\
+ MLX5_INNER_TTC_GROUP2_SIZE +\
+ MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+ struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+ struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+ return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ for (i = 0; i < MLX5_NUM_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+ mlx5_del_flow_rules(ttc->rules[i].rule);
+ ttc->rules[i].rule = NULL;
+ }
+ }
+
+ for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+ mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+ ttc->tunnel_rules[i] = NULL;
+ }
+ }
+}
+
+struct mlx5_etype_proto {
+ u16 etype;
+ u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV4] = {
+ .etype = ETH_P_IP,
+ .proto = 0,
+ },
+ [MLX5_TT_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = 0,
+ },
+ [MLX5_TT_ANY] = {
+ .etype = 0,
+ .proto = 0,
+ },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+ [MLX5_TT_IPV4_GRE] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV6_GRE] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV4_IPIP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV6_IPIP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV4_IPV6] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPV6,
+ },
+ [MLX5_TT_IPV6_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPV6,
+ },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+ return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+ u8 proto_type)
+{
+ switch (proto_type) {
+ case IPPROTO_GRE:
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+ default:
+ return false;
+ }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5_tunnel_proto_supported_rx(mdev,
+ ttc_tunnel_rules[tt].proto))
+ return true;
+ }
+ return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+ if (ethertype == ETH_P_IP)
+ return 4;
+
+ if (ethertype == ETH_P_IPV6)
+ return 6;
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+ int match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+ }
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (match_ipv_outer && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+ } else if (etype) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_flow_handle **trules;
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int tt;
+ int err;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ if (test_bit(tt, params->ignore_dests))
+ continue;
+ rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+ return 0;
+
+ trules = ttc->tunnel_rules;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (!mlx5_tunnel_proto_supported_rx(dev,
+ ttc_tunnel_rules[tt].proto))
+ continue;
+ if (test_bit(tt, params->ignore_tunnel_dests))
+ continue;
+ trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+ &params->tunnel_dests[tt],
+ ttc_tunnel_rules[tt].etype,
+ ttc_tunnel_rules[tt].proto);
+ if (IS_ERR(trules[tt])) {
+ err = PTR_ERR(trules[tt]);
+ trules[tt] = NULL;
+ goto del_rules;
+ }
+ }
+
+ return 0;
+
+del_rules:
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+ bool use_ipv)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ if (use_ipv)
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype, u8 proto)
+{
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (etype && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+ }
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int err;
+ int tt;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ if (test_bit(tt, params->ignore_dests))
+ continue;
+ rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+ &params->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ return 0;
+
+del_rules:
+
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+ GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_inner_ttc_table_groups(ttc);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ mlx5_cleanup_ttc_rules(ttc);
+ for (i = ttc->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ttc->g[i]))
+ mlx5_destroy_flow_group(ttc->g[i]);
+ ttc->g[i] = NULL;
+ }
+
+ kfree(ttc->g);
+ mlx5_destroy_flow_table(ttc->t);
+ kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ bool match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest)
+{
+ return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+ NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+ WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+ "TTC[%d] default dest is not setup yet", type);
+
+ return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+ return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
new file mode 100644
index 000000000000..85fef0cd1c07
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+ MLX5_TT_IPV4_TCP,
+ MLX5_TT_IPV6_TCP,
+ MLX5_TT_IPV4_UDP,
+ MLX5_TT_IPV6_UDP,
+ MLX5_TT_IPV4_IPSEC_AH,
+ MLX5_TT_IPV6_IPSEC_AH,
+ MLX5_TT_IPV4_IPSEC_ESP,
+ MLX5_TT_IPV6_IPSEC_ESP,
+ MLX5_TT_IPV4,
+ MLX5_TT_IPV6,
+ MLX5_TT_ANY,
+ MLX5_NUM_TT,
+ MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+ MLX5_TT_IPV4_GRE,
+ MLX5_TT_IPV6_GRE,
+ MLX5_TT_IPV4_IPIP,
+ MLX5_TT_IPV6_IPIP,
+ MLX5_TT_IPV4_IPV6,
+ MLX5_TT_IPV6_IPV6,
+ MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table_attr ft_attr;
+ struct mlx5_flow_destination dests[MLX5_NUM_TT];
+ DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT);
+ bool inner_ttc;
+ DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT);
+ struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index 7722a3f9bb68..96ffc0a0e670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -55,10 +55,6 @@ void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
{
- if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
- return -EPERM;
- }
if (dev->roce.reserved_gids.start < count) {
mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
count);
@@ -79,7 +75,6 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
{
- WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
count, dev->roce.reserved_gids.count);
@@ -93,12 +88,12 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
{
int end = dev->roce.reserved_gids.start +
- dev->roce.reserved_gids.count;
+ dev->roce.reserved_gids.count - 1;
int index = 0;
- index = ida_simple_get(&dev->roce.reserved_gids.ida,
- dev->roce.reserved_gids.start, end,
- GFP_KERNEL);
+ index = ida_alloc_range(&dev->roce.reserved_gids.ida,
+ dev->roce.reserved_gids.start, end,
+ GFP_KERNEL);
if (index < 0)
return index;
@@ -110,7 +105,7 @@ int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
{
mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
- ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+ ida_free(&dev->roce.reserved_gids.ida, gid_index);
}
unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
@@ -124,8 +119,7 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
{
#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {};
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
source_l3_address);
@@ -143,16 +137,16 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
}
ether_addr_copy(addr_mac, mac);
- MLX5_SET_RA(in_addr, roce_version, roce_version);
- MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
memcpy(addr_l3_addr, gid, gidsz);
}
+ MLX5_SET_RA(in_addr, roce_version, roce_version);
+ MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, set_roce_address, in);
}
EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index 4bad6a5fde56..f240ffe5116c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -92,13 +92,6 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
{
}
-
-static inline int
-mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
- void *buf, int len)
-{
- return 0;
-}
#endif
#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 249539247e2e..032adb21ad4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -80,8 +80,15 @@ void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
/* Crypto */
+enum {
+ MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS,
+ MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC,
+ MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC,
+};
+
int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
- void *key, u32 sz_bytes, u32 *p_key_id);
+ void *key, u32 sz_bytes,
+ u32 key_type, u32 *p_key_id);
void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
@@ -89,4 +96,13 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
return devlink_net(priv_to_devlink(dev));
}
+static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+ mdev->mlx5e_res.uplink_netdev = netdev;
+}
+
+static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
+{
+ return mdev->mlx5e_res.uplink_netdev;
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index 3118e8d66407..8ff16318e32d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -33,6 +33,7 @@
#include <linux/etherdevice.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/mpfs.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
#include "lib/mpfs.h"
@@ -40,8 +41,7 @@
/* HW L2 Table (MPFS) management */
static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
{
- u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
u8 *in_mac_addr;
MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
@@ -50,17 +50,16 @@ static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
ether_addr_copy(&in_mac_addr[2], mac);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, set_l2_table_entry, in);
}
static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
{
- u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {};
MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, delete_l2_table_entry, in);
}
/* UC L2 table hash node */
@@ -123,7 +122,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_mpfs *mpfs = dev->priv.mpfs;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!mpfs)
return;
WARN_ON(!hlist_empty(mpfs->hash));
@@ -138,7 +137,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
int err = 0;
u32 index;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!mpfs)
return 0;
mutex_lock(&mpfs->lock);
@@ -177,6 +176,7 @@ out:
mutex_unlock(&mpfs->lock);
return err;
}
+EXPORT_SYMBOL(mlx5_mpfs_add_mac);
int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
{
@@ -185,7 +185,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
int err = 0;
u32 index;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!mpfs)
return 0;
mutex_lock(&mpfs->lock);
@@ -208,3 +208,4 @@ unlock:
mutex_unlock(&mpfs->lock);
return err;
}
+EXPORT_SYMBOL(mlx5_mpfs_del_mac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
index 4a7b2c3203a7..4a293542a7aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -84,12 +84,9 @@ struct l2addr_node {
#ifdef CONFIG_MLX5_MPFS
int mlx5_mpfs_init(struct mlx5_core_dev *dev);
void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
-int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
#else /* #ifndef CONFIG_MLX5_MPFS */
static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
-static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
-static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
#endif
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
index 48b5c847b642..4571c56ec3c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -1,10 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/port.h>
-#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
#include "lib/port_tun.h"
@@ -145,11 +143,11 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
int reformat_type)
{
- /* the default is error for unknown (non VXLAN/GRE tunnel types) */
int err = -EOPNOTSUPP;
mutex_lock(&tun_entropy->lock);
- if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+ if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN ||
+ reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) &&
tun_entropy->enabled) {
/* in case entropy calculation is enabled for all tunneling
* types, it is ok for VXLAN, so approve.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
new file mode 100644
index 000000000000..84e5683861be
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __LIB_MLX5_SF_H__
+#define __LIB_MLX5_SF_H__
+
+#include <linux/mlx5/driver.h>
+
+static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, sf_base_id);
+}
+
+#ifdef CONFIG_MLX5_SF
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, sf);
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+ if (!mlx5_sf_supported(dev))
+ return 0;
+ if (MLX5_CAP_GEN(dev, max_num_sf))
+ return MLX5_CAP_GEN(dev, max_num_sf);
+ else
+ return 1 << MLX5_CAP_GEN(dev, log_max_sf);
+}
+
+#else
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
new file mode 100644
index 000000000000..9b8c051ccf65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+#include "smfs.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec)
+{
+ struct mlx5dr_match_parameters matcher_mask = {};
+
+ matcher_mask.match_buf = (u64 *)&spec->match_criteria;
+ matcher_mask.match_sz = DR_SZ_MATCH_PARAM;
+
+ return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask);
+}
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+ mlx5dr_matcher_destroy(matcher);
+}
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+ return mlx5dr_table_get_from_fs_ft(ft);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table)
+{
+ return mlx5dr_action_create_dest_table(table);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id)
+{
+ return mlx5dr_action_create_flow_counter(counter_id);
+}
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action)
+{
+ mlx5dr_action_destroy(action);
+}
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+ size_t num_actions, struct mlx5dr_action *actions[],
+ u32 flow_source)
+{
+ struct mlx5dr_match_parameters value = {};
+
+ value.match_buf = (u64 *)spec->match_value;
+ value.match_sz = DR_SZ_MATCH_PARAM;
+
+ return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source);
+}
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule)
+{
+ mlx5dr_rule_destroy(rule);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
new file mode 100644
index 000000000000..452d0df339ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_LIB_SMFS_H__
+#define __MLX5_LIB_SMFS_H__
+
+#include "steering/mlx5dr.h"
+#include "steering/dr_types.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec);
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id);
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action);
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+ size_t num_actions, struct mlx5dr_action *actions[],
+ u32 flow_source);
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule);
+
+#endif /* __MLX5_LIB_SMFS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
new file mode 100644
index 000000000000..696e45e2bd06
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include "lib/tout.h"
+
+struct mlx5_timeouts {
+ u64 to[MAX_TIMEOUT_TYPES];
+};
+
+static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
+ [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000,
+ [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000,
+ [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
+ [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
+ [MLX5_TO_FW_INIT_MS] = 2000,
+ [MLX5_TO_CMD_MS] = 60000,
+ [MLX5_TO_PCI_TOGGLE_MS] = 2000,
+ [MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000,
+ [MLX5_TO_FULL_CRDUMP_MS] = 60000,
+ [MLX5_TO_FW_RESET_MS] = 60000,
+ [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000,
+ [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000,
+ [MLX5_TO_TEARDOWN_MS] = 3000,
+ [MLX5_TO_FSM_REACTIVATE_MS] = 5000,
+ [MLX5_TO_RECLAIM_PAGES_MS] = 5000,
+ [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000
+};
+
+static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
+{
+ dev->timeouts->to[type] = val;
+}
+
+int mlx5_tout_init(struct mlx5_core_dev *dev)
+{
+ int i;
+
+ dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL);
+ if (!dev->timeouts)
+ return -ENOMEM;
+
+ for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
+ tout_set(dev, tout_def_sw_val[i], i);
+
+ return 0;
+}
+
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev)
+{
+ kfree(dev->timeouts);
+}
+
+/* Time register consists of two fields to_multiplier(time out multiplier)
+ * and to_value(time out value). to_value is the quantity of the time units and
+ * to_multiplier is the type and should be one off these four values.
+ * 0x0: millisecond
+ * 0x1: seconds
+ * 0x2: minutes
+ * 0x3: hours
+ * this function converts the time stored in the two register fields into
+ * millisecond.
+ */
+static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val)
+{
+ u64 msec = to_val;
+
+ to_mul &= 0x3;
+ /* convert hours/minutes/seconds to miliseconds */
+ if (to_mul)
+ msec *= 1000 * int_pow(60, to_mul - 1);
+
+ return msec;
+}
+
+static u64 tout_convert_iseg_to_ms(u32 iseg_to)
+{
+ return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff);
+}
+
+static bool tout_is_supported(struct mlx5_core_dev *dev)
+{
+ return !!ioread32be(&dev->iseg->cmd_q_init_to);
+}
+
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev)
+{
+ u32 to;
+
+ if (!tout_is_supported(dev))
+ return;
+
+ to = ioread32be(&dev->iseg->cmd_q_init_to);
+ tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS);
+
+ to = ioread32be(&dev->iseg->cmd_exec_to);
+ tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS);
+}
+
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type)
+{
+ return dev->timeouts->to[type];
+}
+
+#define MLX5_TIMEOUT_QUERY(fld, reg_out) \
+ ({ \
+ struct mlx5_ifc_default_timeout_bits *time_field; \
+ u32 to_multi, to_value; \
+ u64 to_val_ms; \
+ \
+ time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \
+ to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \
+ to_value = MLX5_GET(default_timeout, time_field, to_value); \
+ to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \
+ to_val_ms; \
+ })
+
+#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \
+ ({ \
+ u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \
+ tout_set(dev, fw_to + (to_extra), to_type); \
+ fw_to; \
+ })
+
+static int tout_query_dtor(struct mlx5_core_dev *dev)
+{
+ u64 pcie_toggle_to_val, tear_down_to_val;
+ u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0);
+ if (err)
+ return err;
+
+ pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0);
+ MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val);
+
+ tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0);
+ MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS,
+ tear_down_to_val);
+
+ MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0);
+ MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0);
+ MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0);
+ MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
+ MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
+ MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
+
+ return 0;
+}
+
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev)
+{
+ if (tout_is_supported(dev))
+ return tout_query_dtor(dev);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
new file mode 100644
index 000000000000..bc9e9aeda847
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef MLX5_TIMEOUTS_H
+#define MLX5_TIMEOUTS_H
+
+enum mlx5_timeouts_types {
+ /* pre init timeouts (not read from FW) */
+ MLX5_TO_FW_PRE_INIT_TIMEOUT_MS,
+ MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS,
+ MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS,
+ MLX5_TO_FW_PRE_INIT_WAIT_MS,
+
+ /* init segment timeouts */
+ MLX5_TO_FW_INIT_MS,
+ MLX5_TO_CMD_MS,
+
+ /* DTOR timeouts */
+ MLX5_TO_PCI_TOGGLE_MS,
+ MLX5_TO_HEALTH_POLL_INTERVAL_MS,
+ MLX5_TO_FULL_CRDUMP_MS,
+ MLX5_TO_FW_RESET_MS,
+ MLX5_TO_FLUSH_ON_ERROR_MS,
+ MLX5_TO_PCI_SYNC_UPDATE_MS,
+ MLX5_TO_TEARDOWN_MS,
+ MLX5_TO_FSM_REACTIVATE_MS,
+ MLX5_TO_RECLAIM_PAGES_MS,
+ MLX5_TO_RECLAIM_VFS_PAGES_MS,
+
+ MAX_TIMEOUT_TYPES
+};
+
+struct mlx5_core_dev;
+int mlx5_tout_init(struct mlx5_core_dev *dev);
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
+
+#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
+
+# endif /* MLX5_TIMEOUTS_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index 148b55c3db7a..d55e15c1f380 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/refcount.h>
#include <linux/mlx5/driver.h>
#include <net/vxlan.h>
@@ -40,153 +39,108 @@
struct mlx5_vxlan {
struct mlx5_core_dev *mdev;
- spinlock_t lock; /* protect vxlan table */
- /* max_num_ports is usuallly 4, 16 buckets is more than enough */
+ /* max_num_ports is usually 4, 16 buckets is more than enough */
DECLARE_HASHTABLE(htable, 4);
- int num_ports;
struct mutex sync_lock; /* sync add/del port HW operations */
};
struct mlx5_vxlan_port {
struct hlist_node hlist;
- refcount_t refcount;
u16 udp_port;
};
-static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
-{
- return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
-}
-
static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
{
- u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {};
MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, add_vxlan_udp_dport, in);
}
static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
{
- u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {};
MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, delete_vxlan_udp_dport, in);
}
-static struct mlx5_vxlan_port*
-mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
+bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
{
struct mlx5_vxlan_port *vxlanp;
+ bool found = false;
- hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
- if (vxlanp->udp_port == port)
- return vxlanp;
- }
+ if (!mlx5_vxlan_allowed(vxlan))
+ return NULL;
- return NULL;
+ rcu_read_lock();
+ hash_for_each_possible_rcu(vxlan->htable, vxlanp, hlist, port)
+ if (vxlanp->udp_port == port) {
+ found = true;
+ break;
+ }
+ rcu_read_unlock();
+
+ return found;
}
-struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+static struct mlx5_vxlan_port *vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
{
struct mlx5_vxlan_port *vxlanp;
- if (!mlx5_vxlan_allowed(vxlan))
- return NULL;
-
- spin_lock_bh(&vxlan->lock);
- vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
- spin_unlock_bh(&vxlan->lock);
-
- return vxlanp;
+ hash_for_each_possible(vxlan->htable, vxlanp, hlist, port)
+ if (vxlanp->udp_port == port)
+ return vxlanp;
+ return NULL;
}
int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
{
struct mlx5_vxlan_port *vxlanp;
- int ret = -ENOSPC;
-
- vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
- if (vxlanp) {
- refcount_inc(&vxlanp->refcount);
- return 0;
- }
-
- mutex_lock(&vxlan->sync_lock);
- if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
- mlx5_core_info(vxlan->mdev,
- "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
- port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
- ret = -ENOSPC;
- goto unlock;
- }
-
- ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
- if (ret)
- goto unlock;
+ int ret;
vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
- if (!vxlanp) {
- ret = -ENOMEM;
- goto err_delete_port;
- }
-
+ if (!vxlanp)
+ return -ENOMEM;
vxlanp->udp_port = port;
- refcount_set(&vxlanp->refcount, 1);
- spin_lock_bh(&vxlan->lock);
- hash_add(vxlan->htable, &vxlanp->hlist, port);
- spin_unlock_bh(&vxlan->lock);
+ ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+ if (ret) {
+ kfree(vxlanp);
+ return ret;
+ }
- vxlan->num_ports++;
+ mutex_lock(&vxlan->sync_lock);
+ hash_add_rcu(vxlan->htable, &vxlanp->hlist, port);
mutex_unlock(&vxlan->sync_lock);
- return 0;
-err_delete_port:
- mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
-
-unlock:
- mutex_unlock(&vxlan->sync_lock);
- return ret;
+ return 0;
}
int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
{
struct mlx5_vxlan_port *vxlanp;
- bool remove = false;
int ret = 0;
mutex_lock(&vxlan->sync_lock);
- spin_lock_bh(&vxlan->lock);
- vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
- if (!vxlanp) {
+ vxlanp = vxlan_lookup_port(vxlan, port);
+ if (WARN_ON(!vxlanp)) {
ret = -ENOENT;
goto out_unlock;
}
- if (refcount_dec_and_test(&vxlanp->refcount)) {
- hash_del(&vxlanp->hlist);
- remove = true;
- }
+ hash_del_rcu(&vxlanp->hlist);
+ synchronize_rcu();
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+ kfree(vxlanp);
out_unlock:
- spin_unlock_bh(&vxlan->lock);
-
- if (remove) {
- mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
- kfree(vxlanp);
- vxlan->num_ports--;
- }
-
mutex_unlock(&vxlan->sync_lock);
-
return ret;
}
@@ -203,7 +157,6 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
vxlan->mdev = mdev;
mutex_init(&vxlan->sync_lock);
- spin_lock_init(&vxlan->lock);
hash_init(vxlan->htable);
/* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */
@@ -214,6 +167,17 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
{
+ if (!mlx5_vxlan_allowed(vxlan))
+ return;
+
+ mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT);
+ WARN_ON(!hash_empty(vxlan->htable));
+
+ kfree(vxlan);
+}
+
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan)
+{
struct mlx5_vxlan_port *vxlanp;
struct hlist_node *tmp;
int bkt;
@@ -221,12 +185,12 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
if (!mlx5_vxlan_allowed(vxlan))
return;
- /* Lockless since we are the only hash table consumers*/
hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
- hash_del(&vxlanp->hlist);
- mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
- kfree(vxlanp);
+ /* Don't delete default UDP port added by the HW.
+ * Remove only user configured ports
+ */
+ if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT)
+ continue;
+ mlx5_vxlan_del_port(vxlan, vxlanp->udp_port);
}
-
- kfree(vxlan);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
index 8fb0eb08fa6d..34ef662da35e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -37,6 +37,11 @@
struct mlx5_vxlan;
struct mlx5_vxlan_port;
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
{
/* not allowed reason is encoded in vxlan pointer as error,
@@ -50,15 +55,16 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
-struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan);
#else
static inline struct mlx5_vxlan*
mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
-static inline struct mx5_vxlan_port*
-mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return NULL; }
+static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; }
+static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; }
#endif
#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f554cfddcf4e..283c4cc28944 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -50,6 +50,7 @@
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
+#include <linux/version.h>
#include <net/devlink.h>
#include "mlx5_core.h"
#include "lib/eq.h"
@@ -57,11 +58,11 @@
#include "lib/mpfs.h"
#include "eswitch.h"
#include "devlink.h"
+#include "fw_reset.h"
#include "lib/mlx5.h"
+#include "lib/tout.h"
#include "fpga/core.h"
-#include "fpga/ipsec.h"
-#include "accel/ipsec.h"
-#include "accel/tls.h"
+#include "en_accel/ipsec.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
#include "lib/geneve.h"
@@ -70,28 +71,35 @@
#include "diag/fw_tracer.h"
#include "ecpf.h"
#include "lib/hv_vhca.h"
+#include "diag/rsc_dump.h"
+#include "sf/vhca_event.h"
+#include "sf/dev/dev.h"
+#include "sf/sf.h"
+#include "mlx5_irq.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRIVER_VERSION);
unsigned int mlx5_core_debug_mask;
module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
-#define MLX5_DEFAULT_PROF 2
static unsigned int prof_sel = MLX5_DEFAULT_PROF;
module_param_named(prof_sel, prof_sel, uint, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
static u32 sw_owner_id[4];
+#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1)
+static DEFINE_IDA(sw_vhca_ida);
enum {
MLX5_ATOMIC_REQ_MODE_BE = 0x0,
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
};
+#define LOG_MAX_SUPPORTED_QPS 0xff
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
@@ -103,7 +111,7 @@ static struct mlx5_profile profile[] = {
[2] = {
.mask = MLX5_PROF_MASK_QP_SIZE |
MLX5_PROF_MASK_MR_CACHE,
- .log_max_qp = 18,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
.mr_cache[0] = {
.size = 500,
.limit = 250
@@ -171,32 +179,30 @@ static struct mlx5_profile profile[] = {
},
};
-#define FW_INIT_TIMEOUT_MILI 2000
-#define FW_INIT_WAIT_MS 2
-#define FW_PRE_INIT_TIMEOUT_MILI 120000
-#define FW_INIT_WARN_MESSAGE_INTERVAL 20000
-
static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
u32 warn_time_mili)
{
unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+ u32 fw_initializing;
int err = 0;
- BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
-
- while (fw_initializing(dev)) {
- if (time_after(jiffies, end)) {
+ do {
+ fw_initializing = ioread32be(&dev->iseg->initializing);
+ if (!(fw_initializing >> 31))
+ break;
+ if (time_after(jiffies, end) ||
+ test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
err = -EBUSY;
break;
}
if (warn_time_mili && time_after(jiffies, warn)) {
- mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
- jiffies_to_msecs(end - warn) / 1000);
+ mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n",
+ jiffies_to_msecs(end - warn) / 1000, fw_initializing);
warn = jiffies + msecs_to_jiffies(warn_time_mili);
}
- msleep(FW_INIT_WAIT_MS);
- }
+ msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
+ } while (true);
return err;
}
@@ -205,8 +211,7 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
{
int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
driver_version);
- u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
- u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
+ u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
int remaining_size = driver_ver_sz;
char *string;
@@ -221,47 +226,38 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
strncat(string, ",", remaining_size);
remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
- strncat(string, DRIVER_NAME, remaining_size);
+ strncat(string, KBUILD_MODNAME, remaining_size);
remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
strncat(string, ",", remaining_size);
remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
- strncat(string, DRIVER_VERSION, remaining_size);
+
+ snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
+ LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
+ LINUX_VERSION_SUBLEVEL);
/*Send the command*/
MLX5_SET(set_driver_version_in, in, opcode,
MLX5_CMD_OP_SET_DRIVER_VERSION);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, set_driver_version, in);
}
static int set_dma_caps(struct pci_dev *pdev)
{
int err;
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
return err;
}
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_warn(&pdev->dev,
- "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "Can't set consistent PCI DMA mask, aborting\n");
- return err;
- }
- }
-
dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
return err;
}
@@ -303,7 +299,7 @@ static int request_bar(struct pci_dev *pdev)
return -ENODEV;
}
- err = pci_request_regions(pdev, DRIVER_NAME);
+ err = pci_request_regions(pdev, KBUILD_MODNAME);
if (err)
dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
@@ -320,13 +316,6 @@ struct mlx5_reg_host_endianness {
u8 rsvd[15];
};
-#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
-
-enum {
- MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
- MLX5_DEV_CAP_FLAG_DCT,
-};
-
static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
{
switch (size) {
@@ -365,7 +354,7 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
if (err) {
mlx5_core_warn(dev,
"QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
@@ -377,11 +366,11 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
switch (cap_mode) {
case HCA_CAP_OPMOD_GET_MAX:
- memcpy(dev->caps.hca_max[cap_type], hca_caps,
+ memcpy(dev->caps.hca[cap_type]->max, hca_caps,
MLX5_UN_SZ_BYTES(hca_cap_union));
break;
case HCA_CAP_OPMOD_GET_CUR:
- memcpy(dev->caps.hca_cur[cap_type], hca_caps,
+ memcpy(dev->caps.hca[cap_type]->cur, hca_caps,
MLX5_UN_SZ_BYTES(hca_cap_union));
break;
default:
@@ -406,30 +395,25 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
}
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
{
- u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
-
MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
- return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, set_hca_cap, in);
}
-static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
{
- void *set_ctx;
void *set_hca_cap;
- int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
int req_endianness;
int err;
- if (MLX5_CAP_GEN(dev, atomic)) {
- err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
- if (err)
- return err;
- } else {
+ if (!MLX5_CAP_GEN(dev, atomic))
return 0;
- }
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
req_endianness =
MLX5_CAP_ATOMIC(dev,
@@ -438,27 +422,18 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
return 0;
- set_ctx = kzalloc(set_sz, GFP_KERNEL);
- if (!set_ctx)
- return -ENOMEM;
-
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
/* Set requestor to host endianness */
MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
- err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
-
- kfree(set_ctx);
- return err;
+ return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
}
-static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
{
void *set_hca_cap;
- void *set_ctx;
- int set_sz;
bool do_set = false;
int err;
@@ -470,13 +445,8 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
if (err)
return err;
- set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
- set_ctx = kzalloc(set_sz, GFP_KERNEL);
- if (!set_ctx)
- return -ENOMEM;
-
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
- memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
MLX5_ST_SZ_BYTES(odp_cap));
#define ODP_CAP_SET_MAX(dev, field) \
@@ -503,34 +473,84 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
- if (do_set)
- err = set_caps(dev, set_ctx, set_sz,
- MLX5_SET_HCA_CAP_OP_MOD_ODP);
+ if (!do_set)
+ return 0;
- kfree(set_ctx);
+ return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+}
+
+static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
return err;
}
-static int handle_hca_cap(struct mlx5_core_dev *dev)
+bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ &val);
+
+ if (!err)
+ return val.vbool;
+
+ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
+ return MLX5_CAP_GEN(dev, roce);
+}
+EXPORT_SYMBOL(mlx5_is_roce_on);
+
+static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
{
- void *set_ctx = NULL;
- struct mlx5_profile *prof = dev->profile;
- int err = -ENOMEM;
- int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
void *set_hca_cap;
+ int err;
- set_ctx = kzalloc(set_sz, GFP_KERNEL);
- if (!set_ctx)
- goto query_ex;
+ if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
+ if (err)
+ return err;
+
+ if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) ||
+ !(dev->priv.sw_vhca_id > 0))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur,
+ MLX5_ST_SZ_BYTES(cmd_hca_cap_2));
+ MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1);
+
+ return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2);
+}
+
+static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ struct mlx5_profile *prof = &dev->profile;
+ void *set_hca_cap;
+ int max_uc_list;
+ int err;
err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
if (err)
- goto query_ex;
+ return err;
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
capability);
- memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur,
MLX5_ST_SZ_BYTES(cmd_hca_cap));
mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
@@ -541,11 +561,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
to_fw_pkey_sz(dev, 128));
/* Check log_max_qp from HCA caps to set in current profile */
- if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
+ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
+ prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp));
+ } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
- profile[prof_sel].log_max_qp,
+ prof->log_max_qp,
MLX5_CAP_GEN_MAX(dev, log_max_qp));
- profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
}
if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
@@ -571,43 +593,164 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN_MAX(dev, dct))
MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
+ if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
+
if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
MLX5_SET(cmd_hca_cap,
set_hca_cap,
num_vhca_ports,
MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
- err = set_caps(dev, set_ctx, set_sz,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+ if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, mkey_by_name))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
+
+ mlx5_vhca_state_cap_handle(dev, set_hca_cap);
+
+ if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
+ MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
+
+ if (MLX5_CAP_GEN(dev, roce_rw_supported))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
+ mlx5_is_roce_on(dev));
+
+ max_uc_list = max_uc_list_get_devlink_param(dev);
+ if (max_uc_list > 0)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
+ ilog2(max_uc_list));
+
+ return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+}
+
+/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the
+ * boot process.
+ * In case RoCE cap is writable in FW and user/devlink requested to change the
+ * cap, we are yet to query the final state of the above cap.
+ * Hence, the need for this function.
+ *
+ * Returns
+ * True:
+ * 1) RoCE cap is read only in FW and already disabled
+ * OR:
+ * 2) RoCE cap is writable in FW and user/devlink requested it off.
+ *
+ * In any other case, return False.
+ */
+static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
+{
+ return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
+ (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
+}
+
+static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (is_roce_fw_disabled(dev))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
+ !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur,
+ MLX5_ST_SZ_BYTES(roce_cap));
+ MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+ return err;
+}
+
+static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev,
+ void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, port_selection_cap))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) ||
+ !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur,
+ MLX5_ST_SZ_BYTES(port_selection_cap));
+ MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1);
+
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION);
-query_ex:
- kfree(set_ctx);
return err;
}
static int set_hca_cap(struct mlx5_core_dev *dev)
{
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_ctx;
int err;
- err = handle_hca_cap(dev);
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ err = handle_hca_cap(dev, set_ctx);
if (err) {
mlx5_core_err(dev, "handle_hca_cap failed\n");
goto out;
}
- err = handle_hca_cap_atomic(dev);
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_atomic(dev, set_ctx);
if (err) {
mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
goto out;
}
- err = handle_hca_cap_odp(dev);
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_odp(dev, set_ctx);
if (err) {
mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
goto out;
}
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_roce(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_2(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_port_selection(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n");
+ goto out;
+ }
+
out:
+ kfree(set_ctx);
return err;
}
@@ -641,63 +784,39 @@ static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
{
- u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
MLX5_SET(enable_hca_in, in, function_id, func_id);
MLX5_SET(enable_hca_in, in, embedded_cpu_function,
dev->caps.embedded_cpu);
- return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, enable_hca, in);
}
int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
{
- u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
MLX5_SET(disable_hca_in, in, function_id, func_id);
MLX5_SET(enable_hca_in, in, embedded_cpu_function,
dev->caps.embedded_cpu);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
- struct ptp_system_timestamp *sts)
-{
- u32 timer_h, timer_h1, timer_l;
-
- timer_h = ioread32be(&dev->iseg->internal_timer_h);
- ptp_read_system_prets(sts);
- timer_l = ioread32be(&dev->iseg->internal_timer_l);
- ptp_read_system_postts(sts);
- timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
- if (timer_h != timer_h1) {
- /* wrap around */
- ptp_read_system_prets(sts);
- timer_l = ioread32be(&dev->iseg->internal_timer_l);
- ptp_read_system_postts(sts);
- }
-
- return (u64)timer_l | (u64)timer_h1 << 32;
+ return mlx5_cmd_exec_in(dev, disable_hca, in);
}
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
{
- u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
- u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
+ u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
+ u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {};
u32 sup_issi;
int err;
MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
- err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
- query_out, sizeof(query_out));
+ err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
if (err) {
- u32 syndrome;
- u8 status;
+ u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome);
+ u8 status = MLX5_GET(query_issi_out, query_out, status);
- mlx5_cmd_mbox_status(query_out, &status, &syndrome);
if (!status || syndrome == MLX5_DRIVER_SYND) {
mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
err, status, syndrome);
@@ -712,13 +831,11 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
if (sup_issi & (1 << 1)) {
- u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0};
- u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
+ u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {};
MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
MLX5_SET(set_issi_in, set_in, current_issi, 1);
- err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
- set_out, sizeof(set_out));
+ err = mlx5_cmd_exec_in(dev, set_issi, set_in);
if (err) {
mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
err);
@@ -738,14 +855,12 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
const struct pci_device_id *id)
{
- struct mlx5_priv *priv = &dev->priv;
int err = 0;
mutex_init(&dev->pci_status_mutex);
pci_set_drvdata(dev->pdev, dev);
dev->bar_addr = pci_resource_start(pdev, 0);
- priv->numa_node = dev_to_node(&dev->pdev->dev);
err = mlx5_pci_enable_device(dev);
if (err) {
@@ -781,7 +896,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
}
mlx5_pci_vsc_init(dev);
-
+ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
return 0;
err_clr_master:
@@ -794,6 +909,11 @@ err_disable:
static void mlx5_pci_close(struct mlx5_core_dev *dev)
{
+ /* health work might still be active, and it needs pci bar in
+ * order to know the NIC state. Therefore, drain the health WQ
+ * before removing the pci bars
+ */
+ mlx5_drain_health_wq(dev);
iounmap(dev->iseg);
pci_clear_master(dev->pdev);
release_bar(dev->pdev);
@@ -833,9 +953,13 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_eq_cleanup;
}
- mlx5_cq_debugfs_init(dev);
+ err = mlx5_fw_reset_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize fw reset events\n");
+ goto err_events_cleanup;
+ }
- mlx5_init_qp_table(dev);
+ mlx5_cq_debugfs_init(dev);
mlx5_init_reserved_gids(dev);
@@ -874,15 +998,48 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_eswitch_cleanup;
}
+ err = mlx5_vhca_event_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err);
+ goto err_fpga_cleanup;
+ }
+
+ err = mlx5_sf_hw_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init SF HW table %d\n", err);
+ goto err_sf_hw_table_cleanup;
+ }
+
+ err = mlx5_sf_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init SF table %d\n", err);
+ goto err_sf_table_cleanup;
+ }
+
+ err = mlx5_fs_core_alloc(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc flow steering\n");
+ goto err_fs;
+ }
+
dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
+ dev->rsc_dump = mlx5_rsc_dump_create(dev);
return 0;
+err_fs:
+ mlx5_sf_table_cleanup(dev);
+err_sf_table_cleanup:
+ mlx5_sf_hw_table_cleanup(dev);
+err_sf_hw_table_cleanup:
+ mlx5_vhca_event_cleanup(dev);
+err_fpga_cleanup:
+ mlx5_fpga_cleanup(dev);
err_eswitch_cleanup:
mlx5_eswitch_cleanup(dev->priv.eswitch);
err_sriov_cleanup:
@@ -894,8 +1051,9 @@ err_rl_cleanup:
err_tables_cleanup:
mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
- mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
+err_events_cleanup:
mlx5_events_cleanup(dev);
err_eq_cleanup:
mlx5_eq_table_cleanup(dev);
@@ -909,9 +1067,14 @@ err_devcom:
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
+ mlx5_rsc_dump_destroy(dev);
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_dm_cleanup(dev);
+ mlx5_fs_core_free(dev);
+ mlx5_sf_table_cleanup(dev);
+ mlx5_sf_hw_table_cleanup(dev);
+ mlx5_vhca_event_cleanup(dev);
mlx5_fpga_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
mlx5_sriov_cleanup(dev);
@@ -921,15 +1084,15 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
- mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
mlx5_events_cleanup(dev);
mlx5_eq_table_cleanup(dev);
mlx5_irq_table_cleanup(dev);
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout)
{
int err;
@@ -942,10 +1105,11 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
/* wait for firmware to accept initialization segments configurations
*/
- err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
+ err = wait_fw_init(dev, timeout,
+ mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
if (err) {
- mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
- FW_PRE_INIT_TIMEOUT_MILI);
+ mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
+ timeout);
return err;
}
@@ -955,17 +1119,23 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
return err;
}
- err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
+ mlx5_tout_query_iseg(dev);
+
+ err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0);
if (err) {
- mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
- FW_INIT_TIMEOUT_MILI);
+ mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n",
+ mlx5_tout_ms(dev, FW_INIT));
goto err_cmd_cleanup;
}
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
+
+ mlx5_start_health_poll(dev);
+
err = mlx5_core_enable_hca(dev, 0);
if (err) {
mlx5_core_err(dev, "enable hca failed\n");
- goto err_cmd_cleanup;
+ goto stop_health_poll;
}
err = mlx5_core_set_issi(dev);
@@ -980,6 +1150,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
goto err_disable_hca;
}
+ err = mlx5_tout_query_dtor(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to read dtor\n");
+ goto reclaim_boot_pages;
+ }
+
err = set_hca_ctrl(dev);
if (err) {
mlx5_core_err(dev, "set_hca_ctrl failed\n");
@@ -1006,23 +1182,23 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
mlx5_set_driver_version(dev);
- mlx5_start_health_poll(dev);
-
err = mlx5_query_hca_caps(dev);
if (err) {
mlx5_core_err(dev, "query hca failed\n");
- goto stop_health;
+ goto reclaim_boot_pages;
}
+ mlx5_start_health_fw_log_up(dev);
return 0;
-stop_health:
- mlx5_stop_health_poll(dev, boot);
reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
err_disable_hca:
mlx5_core_disable_hca(dev, 0);
+stop_health_poll:
+ mlx5_stop_health_poll(dev, boot);
err_cmd_cleanup:
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
return err;
@@ -1032,7 +1208,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
{
int err;
- mlx5_stop_health_poll(dev, boot);
err = mlx5_cmd_teardown_hca(dev);
if (err) {
mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
@@ -1040,6 +1215,8 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
}
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
+ mlx5_stop_health_poll(dev, boot);
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
return 0;
@@ -1073,31 +1250,28 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err = mlx5_fw_tracer_init(dev->tracer);
if (err) {
- mlx5_core_err(dev, "Failed to init FW tracer\n");
- goto err_fw_tracer;
+ mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
+ mlx5_fw_tracer_destroy(dev->tracer);
+ dev->tracer = NULL;
}
+ mlx5_fw_reset_events_start(dev);
mlx5_hv_vhca_init(dev->hv_vhca);
- err = mlx5_fpga_device_start(dev);
- if (err) {
- mlx5_core_err(dev, "fpga device start failed %d\n", err);
- goto err_fpga_start;
- }
-
- err = mlx5_accel_ipsec_init(dev);
+ err = mlx5_rsc_dump_init(dev);
if (err) {
- mlx5_core_err(dev, "IPSec device start failed %d\n", err);
- goto err_ipsec_start;
+ mlx5_core_err(dev, "Failed to init Resource dump %d\n", err);
+ mlx5_rsc_dump_destroy(dev);
+ dev->rsc_dump = NULL;
}
- err = mlx5_accel_tls_init(dev);
+ err = mlx5_fpga_device_start(dev);
if (err) {
- mlx5_core_err(dev, "TLS device start failed %d\n", err);
- goto err_tls_start;
+ mlx5_core_err(dev, "fpga device start failed %d\n", err);
+ goto err_fpga_start;
}
- err = mlx5_init_fs(dev);
+ err = mlx5_fs_core_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init flow steering\n");
goto err_fs;
@@ -1106,13 +1280,15 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err = mlx5_core_set_hca_defaults(dev);
if (err) {
mlx5_core_err(dev, "Failed to set hca defaults\n");
- goto err_sriov;
+ goto err_set_hca;
}
- err = mlx5_sriov_attach(dev);
+ mlx5_vhca_event_start(dev);
+
+ err = mlx5_sf_hw_table_create(dev);
if (err) {
- mlx5_core_err(dev, "sriov init failed %d\n", err);
- goto err_sriov;
+ mlx5_core_err(dev, "sf table create failed %d\n", err);
+ goto err_vhca;
}
err = mlx5_ec_init(dev);
@@ -1121,22 +1297,33 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_ec;
}
+ mlx5_lag_add_mdev(dev);
+ err = mlx5_sriov_attach(dev);
+ if (err) {
+ mlx5_core_err(dev, "sriov init failed %d\n", err);
+ goto err_sriov;
+ }
+
+ mlx5_sf_dev_table_create(dev);
+
return 0;
-err_ec:
- mlx5_sriov_detach(dev);
err_sriov:
- mlx5_cleanup_fs(dev);
+ mlx5_lag_remove_mdev(dev);
+ mlx5_ec_cleanup(dev);
+err_ec:
+ mlx5_sf_hw_table_destroy(dev);
+err_vhca:
+ mlx5_vhca_event_stop(dev);
+err_set_hca:
+ mlx5_fs_core_cleanup(dev);
err_fs:
- mlx5_accel_tls_cleanup(dev);
-err_tls_start:
- mlx5_accel_ipsec_cleanup(dev);
-err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
+ mlx5_rsc_dump_cleanup(dev);
mlx5_hv_vhca_cleanup(dev->hv_vhca);
+ mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
-err_fw_tracer:
mlx5_eq_table_destroy(dev);
err_eq_table:
mlx5_irq_table_destroy(dev);
@@ -1149,13 +1336,18 @@ err_irq_table:
static void mlx5_unload(struct mlx5_core_dev *dev)
{
- mlx5_ec_cleanup(dev);
+ mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
- mlx5_cleanup_fs(dev);
- mlx5_accel_ipsec_cleanup(dev);
- mlx5_accel_tls_cleanup(dev);
+ mlx5_eswitch_disable(dev->priv.eswitch);
+ mlx5_lag_remove_mdev(dev);
+ mlx5_ec_cleanup(dev);
+ mlx5_sf_hw_table_destroy(dev);
+ mlx5_vhca_event_stop(dev);
+ mlx5_fs_core_cleanup(dev);
mlx5_fpga_device_stop(dev);
+ mlx5_rsc_dump_cleanup(dev);
mlx5_hv_vhca_cleanup(dev->hv_vhca);
+ mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
@@ -1164,115 +1356,241 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_put_uars_page(dev, dev->priv.uar);
}
-int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
+int mlx5_init_one(struct mlx5_core_dev *dev)
{
+ struct devlink *devlink = priv_to_devlink(dev);
int err = 0;
- dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+ devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
- if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- mlx5_core_warn(dev, "interface is up, NOP\n");
- goto out;
- }
- /* remove any previous indication of internal error */
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, boot);
+ err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
if (err)
- goto out;
+ goto err_function;
- if (boot) {
- err = mlx5_init_once(dev);
- if (err) {
- mlx5_core_err(dev, "sw objs init failed\n");
- goto function_teardown;
- }
+ err = mlx5_init_once(dev);
+ if (err) {
+ mlx5_core_err(dev, "sw objs init failed\n");
+ goto function_teardown;
}
err = mlx5_load(dev);
if (err)
goto err_load;
- if (boot) {
- err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
- if (err)
- goto err_devlink_reg;
- }
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
- if (mlx5_device_registered(dev)) {
- mlx5_attach_device(dev);
- } else {
- err = mlx5_register_device(dev);
- if (err) {
- mlx5_core_err(dev, "register device failed %d\n", err);
- goto err_reg_dev;
- }
- }
+ err = mlx5_devlink_register(priv_to_devlink(dev));
+ if (err)
+ goto err_devlink_reg;
- set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-out:
- mutex_unlock(&dev->intf_state_mutex);
+ err = mlx5_register_device(dev);
+ if (err)
+ goto err_register;
- return err;
+ mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
+ return 0;
-err_reg_dev:
- if (boot)
- mlx5_devlink_unregister(priv_to_devlink(dev));
+err_register:
+ mlx5_devlink_unregister(priv_to_devlink(dev));
err_devlink_reg:
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev);
err_load:
- if (boot)
- mlx5_cleanup_once(dev);
+ mlx5_cleanup_once(dev);
function_teardown:
- mlx5_function_teardown(dev, boot);
+ mlx5_function_teardown(dev, true);
+err_function:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mutex_unlock(&dev->intf_state_mutex);
-
+ devl_unlock(devlink);
return err;
}
-int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
+void mlx5_uninit_one(struct mlx5_core_dev *dev)
{
- if (cleanup) {
- mlx5_unregister_device(dev);
- mlx5_drain_health_wq(dev);
- }
+ struct devlink *devlink = priv_to_devlink(dev);
+ devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
+
+ mlx5_unregister_device(dev);
+ mlx5_devlink_unregister(priv_to_devlink(dev));
+
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_core_warn(dev, "%s: interface is down, NOP\n",
__func__);
- if (cleanup)
- mlx5_cleanup_once(dev);
+ mlx5_cleanup_once(dev);
goto out;
}
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ mlx5_unload(dev);
+ mlx5_cleanup_once(dev);
+ mlx5_function_teardown(dev, true);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
+}
- if (mlx5_device_registered(dev))
- mlx5_detach_device(dev);
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
+{
+ int err = 0;
+ u64 timeout;
+
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "interface is up, NOP\n");
+ goto out;
+ }
+ /* remove any previous indication of internal error */
+ dev->state = MLX5_DEVICE_STATE_UP;
+
+ if (recovery)
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
+ else
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
+ err = mlx5_function_setup(dev, false, timeout);
+ if (err)
+ goto err_function;
+ err = mlx5_load(dev);
+ if (err)
+ goto err_load;
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
+ err = mlx5_attach_device(dev);
+ if (err)
+ goto err_attach;
+
+ mutex_unlock(&dev->intf_state_mutex);
+ return 0;
+
+err_attach:
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev);
+err_load:
+ mlx5_function_teardown(dev, false);
+err_function:
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+ return err;
+}
- if (cleanup)
- mlx5_cleanup_once(dev);
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ int ret;
+
+ devl_lock(devlink);
+ ret = mlx5_load_one_devl_locked(dev, recovery);
+ devl_unlock(devlink);
+ return ret;
+}
+
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev)
+{
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&dev->intf_state_mutex);
+
+ mlx5_detach_device(dev);
+
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+ __func__);
+ goto out;
+ }
- mlx5_function_teardown(dev, cleanup);
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ mlx5_unload(dev);
+ mlx5_function_teardown(dev, false);
out:
mutex_unlock(&dev->intf_state_mutex);
+}
+
+void mlx5_unload_one(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ devl_lock(devlink);
+ mlx5_unload_one_devl_locked(dev);
+ devl_unlock(devlink);
+}
+
+static const int types[] = {
+ MLX5_CAP_GENERAL,
+ MLX5_CAP_GENERAL_2,
+ MLX5_CAP_ETHERNET_OFFLOADS,
+ MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
+ MLX5_CAP_ODP,
+ MLX5_CAP_ATOMIC,
+ MLX5_CAP_ROCE,
+ MLX5_CAP_IPOIB_OFFLOADS,
+ MLX5_CAP_FLOW_TABLE,
+ MLX5_CAP_ESWITCH_FLOW_TABLE,
+ MLX5_CAP_ESWITCH,
+ MLX5_CAP_VECTOR_CALC,
+ MLX5_CAP_QOS,
+ MLX5_CAP_DEBUG,
+ MLX5_CAP_DEV_MEM,
+ MLX5_CAP_DEV_EVENT,
+ MLX5_CAP_TLS,
+ MLX5_CAP_VDPA_EMULATION,
+ MLX5_CAP_IPSEC,
+ MLX5_CAP_PORT_SELECTION,
+ MLX5_CAP_DEV_SHAMPO,
+ MLX5_CAP_MACSEC,
+ MLX5_CAP_ADV_VIRTUALIZATION,
+};
+
+static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
+{
+ int type;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
+ type = types[i];
+ kfree(dev->caps.hca[type]);
+ }
+}
+
+static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hca_cap *cap;
+ int type;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
+ cap = kzalloc(sizeof(*cap), GFP_KERNEL);
+ if (!cap)
+ goto err;
+ type = types[i];
+ dev->caps.hca[type] = cap;
+ }
+
return 0;
+
+err:
+ mlx5_hca_caps_free(dev);
+ return -ENOMEM;
}
-static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
+int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
{
struct mlx5_priv *priv = &dev->priv;
int err;
- dev->profile = &profile[profile_idx];
-
+ memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ lockdep_register_key(&dev->lock_key);
mutex_init(&dev->intf_state_mutex);
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1282,13 +1600,16 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&priv->alloc_mutex);
mutex_init(&priv->pgdir_mutex);
INIT_LIST_HEAD(&priv->pgdir_list);
- spin_lock_init(&priv->mkey_lock);
- priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
- mlx5_debugfs_root);
- if (!priv->dbg_root) {
- dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n");
- return -ENOMEM;
+ priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
+ priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device),
+ mlx5_debugfs_root);
+ INIT_LIST_HEAD(&priv->traps);
+
+ err = mlx5_tout_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
+ goto err_timeout_init;
}
err = mlx5_health_init(dev);
@@ -1299,31 +1620,75 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
if (err)
goto err_pagealloc_init;
+ err = mlx5_adev_init(dev);
+ if (err)
+ goto err_adev_init;
+
+ err = mlx5_hca_caps_alloc(dev);
+ if (err)
+ goto err_hca_caps;
+
+ /* The conjunction of sw_vhca_id with sw_owner_id will be a global
+ * unique id per function which uses mlx5_core.
+ * Those values are supplied to FW as part of the init HCA command to
+ * be used by both driver and FW when it's applicable.
+ */
+ dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1,
+ MAX_SW_VHCA_ID,
+ GFP_KERNEL);
+ if (dev->priv.sw_vhca_id < 0)
+ mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n",
+ dev->priv.sw_vhca_id);
+
return 0;
+err_hca_caps:
+ mlx5_adev_cleanup(dev);
+err_adev_init:
+ mlx5_pagealloc_cleanup(dev);
err_pagealloc_init:
mlx5_health_cleanup(dev);
err_health_init:
- debugfs_remove(dev->priv.dbg_root);
-
+ mlx5_tout_cleanup(dev);
+err_timeout_init:
+ debugfs_remove(dev->priv.dbg.dbg_root);
+ mutex_destroy(&priv->pgdir_mutex);
+ mutex_destroy(&priv->alloc_mutex);
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
return err;
}
-static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
{
+ struct mlx5_priv *priv = &dev->priv;
+
+ if (priv->sw_vhca_id > 0)
+ ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id);
+
+ mlx5_hca_caps_free(dev);
+ mlx5_adev_cleanup(dev);
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
- debugfs_remove_recursive(dev->priv.dbg_root);
+ mlx5_tout_cleanup(dev);
+ debugfs_remove_recursive(dev->priv.dbg.dbg_root);
+ mutex_destroy(&priv->pgdir_mutex);
+ mutex_destroy(&priv->alloc_mutex);
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
}
-#define MLX5_IB_MOD "mlx5_ib"
-static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct mlx5_core_dev *dev;
struct devlink *devlink;
int err;
- devlink = mlx5_devlink_alloc();
+ devlink = mlx5_devlink_alloc(&pdev->dev);
if (!devlink) {
dev_err(&pdev->dev, "devlink alloc failed\n");
return -ENOMEM;
@@ -1336,6 +1701,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+ dev->priv.adev_idx = mlx5_adev_idx_alloc();
+ if (dev->priv.adev_idx < 0) {
+ err = dev->priv.adev_idx;
+ goto adev_init_err;
+ }
+
err = mlx5_mdev_init(dev, prof_sel);
if (err)
goto mdev_init_err;
@@ -1347,27 +1718,28 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto pci_init_err;
}
- err = mlx5_load_one(dev, true);
+ err = mlx5_init_one(dev);
if (err) {
- mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
+ mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
err);
- goto err_load_one;
+ goto err_init_one;
}
- request_module_nowait(MLX5_IB_MOD);
-
err = mlx5_crdump_enable(dev);
if (err)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
pci_save_state(pdev);
+ devlink_register(devlink);
return 0;
-err_load_one:
+err_init_one:
mlx5_pci_close(dev);
pci_init_err:
mlx5_mdev_uninit(dev);
mdev_init_err:
+ mlx5_adev_idx_free(dev->priv.adev_idx);
+adev_init_err:
mlx5_devlink_free(devlink);
return err;
@@ -1378,35 +1750,56 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
+ /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+ * fw_reset before unregistering the devlink.
+ */
+ mlx5_drain_fw_reset(dev);
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+ devlink_unregister(devlink);
+ mlx5_sriov_disable(pdev);
mlx5_crdump_disable(dev);
- mlx5_devlink_unregister(devlink);
-
- if (mlx5_unload_one(dev, true)) {
- mlx5_core_err(dev, "mlx5_unload_one failed\n");
- mlx5_health_flush(dev);
- return;
- }
-
+ mlx5_drain_health_wq(dev);
+ mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
+ mlx5_adev_idx_free(dev->priv.adev_idx);
mlx5_devlink_free(devlink);
}
+#define mlx5_pci_trace(dev, fmt, ...) ({ \
+ struct mlx5_core_dev *__dev = (dev); \
+ mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
+ __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
+ __dev->pci_status, ##__VA_ARGS__); \
+})
+
+static const char *result2str(enum pci_ers_result result)
+{
+ return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
+ result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
+ result == PCI_ERS_RESULT_RECOVERED ? "recovered" :
+ "unknown";
+}
+
static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ enum pci_ers_result res;
- mlx5_core_info(dev, "%s was called\n", __func__);
+ mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
mlx5_enter_error_state(dev, false);
mlx5_error_sw_reset(dev);
- mlx5_unload_one(dev, false);
+ mlx5_unload_one(dev);
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
- return state == pci_channel_io_perm_failure ?
+ res = state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+
+ mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res));
+ return res;
}
/* wait for the device to show vital signs by waiting
@@ -1440,28 +1833,34 @@ static int wait_vital(struct pci_dev *pdev)
static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
{
+ enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- mlx5_core_info(dev, "%s was called\n", __func__);
+ mlx5_pci_trace(dev, "Enter\n");
err = mlx5_pci_enable_device(dev);
if (err) {
mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
__func__, err);
- return PCI_ERS_RESULT_DISCONNECT;
+ goto out;
}
pci_set_master(pdev);
pci_restore_state(pdev);
pci_save_state(pdev);
- if (wait_vital(pdev)) {
- mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
- return PCI_ERS_RESULT_DISCONNECT;
+ err = wait_vital(pdev);
+ if (err) {
+ mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
+ __func__, err);
+ goto out;
}
- return PCI_ERS_RESULT_RECOVERED;
+ res = PCI_ERS_RESULT_RECOVERED;
+out:
+ mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res));
+ return res;
}
static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1469,14 +1868,16 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- mlx5_core_info(dev, "%s was called\n", __func__);
+ mlx5_pci_trace(dev, "Enter, loading driver..\n");
err = mlx5_load_one(dev, false);
- if (err)
- mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
- __func__, err);
- else
- mlx5_core_info(dev, "%s: device recovered\n", __func__);
+
+ if (!err)
+ devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
+ mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
+ !err ? "recovered" : "Failed");
}
static const struct pci_error_handlers mlx5_err_handler = {
@@ -1505,7 +1906,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
}
/* Panic tear down fw command will stop the PCI bus communication
- * with the HCA, so the health polll is no longer needed.
+ * with the HCA, so the health poll is no longer needed.
*/
mlx5_drain_health_wq(dev);
mlx5_stop_health_poll(dev, false);
@@ -1541,12 +1942,29 @@ static void shutdown(struct pci_dev *pdev)
int err;
mlx5_core_info(dev, "Shutdown was called\n");
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
err = mlx5_try_fast_unload(dev);
if (err)
- mlx5_unload_one(dev, false);
+ mlx5_unload_one(dev);
mlx5_pci_disable_device(dev);
}
+static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+ mlx5_unload_one(dev);
+
+ return 0;
+}
+
+static int mlx5_resume(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+ return mlx5_load_one(dev, false);
+}
+
static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
{ PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */
@@ -1564,9 +1982,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
{ PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
+ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */
{ 0, }
};
@@ -1575,26 +1996,76 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
void mlx5_disable_device(struct mlx5_core_dev *dev)
{
mlx5_error_sw_reset(dev);
- mlx5_unload_one(dev, false);
+ mlx5_unload_one_devl_locked(dev);
}
-void mlx5_recover_device(struct mlx5_core_dev *dev)
+int mlx5_recover_device(struct mlx5_core_dev *dev)
{
- mlx5_pci_disable_device(dev);
- if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
- mlx5_pci_resume(dev->pdev);
+ if (!mlx5_core_is_sf(dev)) {
+ mlx5_pci_disable_device(dev);
+ if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
+ return -EIO;
+ }
+
+ return mlx5_load_one_devl_locked(dev, true);
}
static struct pci_driver mlx5_core_driver = {
- .name = DRIVER_NAME,
+ .name = KBUILD_MODNAME,
.id_table = mlx5_core_pci_table,
- .probe = init_one,
+ .probe = probe_one,
.remove = remove_one,
+ .suspend = mlx5_suspend,
+ .resume = mlx5_resume,
.shutdown = shutdown,
.err_handler = &mlx5_err_handler,
.sriov_configure = mlx5_core_sriov_configure,
+ .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix,
+ .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
};
+/**
+ * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
+ * mlx5_core is its driver.
+ * @pdev: The associated PCI device.
+ *
+ * Upon return the interface state lock stay held to let caller uses it safely.
+ * Caller must ensure to use the returned mlx5 device for a narrow window
+ * and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
+ *
+ * Return: Pointer to the associated mlx5_core_dev or NULL.
+ */
+struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
+ if (IS_ERR(mdev))
+ return NULL;
+
+ mutex_lock(&mdev->intf_state_mutex);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
+ mutex_unlock(&mdev->intf_state_mutex);
+ return NULL;
+ }
+
+ return mdev;
+}
+EXPORT_SYMBOL(mlx5_vf_get_core_dev);
+
+/**
+ * mlx5_vf_put_core_dev - Put the mlx5 core device back.
+ * @mdev: The mlx5 core device.
+ *
+ * Upon return the interface state lock is unlocked and caller should not
+ * access the mdev any more.
+ */
+void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
+{
+ mutex_unlock(&mdev->intf_state_mutex);
+}
+EXPORT_SYMBOL(mlx5_vf_put_core_dev);
+
static void mlx5_core_verify_params(void)
{
if (prof_sel >= ARRAY_SIZE(profile)) {
@@ -1610,22 +2081,32 @@ static int __init init(void)
{
int err;
+ WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME),
+ "mlx5_core name not in sync with kernel module name");
+
get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
mlx5_core_verify_params();
- mlx5_accel_ipsec_build_fs_cmds();
mlx5_register_debugfs();
err = pci_register_driver(&mlx5_core_driver);
if (err)
goto err_debug;
-#ifdef CONFIG_MLX5_CORE_EN
- mlx5e_init();
-#endif
+ err = mlx5_sf_driver_register();
+ if (err)
+ goto err_sf;
+
+ err = mlx5e_init();
+ if (err)
+ goto err_en;
return 0;
+err_en:
+ mlx5_sf_driver_unregister();
+err_sf:
+ pci_unregister_driver(&mlx5_core_driver);
err_debug:
mlx5_unregister_debugfs();
return err;
@@ -1633,9 +2114,8 @@ err_debug:
static void __exit cleanup(void)
{
-#ifdef CONFIG_MLX5_CORE_EN
mlx5e_cleanup();
-#endif
+ mlx5_sf_driver_unregister();
pci_unregister_driver(&mlx5_core_driver);
mlx5_unregister_debugfs();
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index ba2b09cc192f..495cca58dccc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -31,36 +31,32 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include <rdma/ib_verbs.h>
#include "mlx5_core.h"
int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
{
- u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
void *gid;
MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
memcpy(gid, mgid, sizeof(*mgid));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, attach_to_mcg, in);
}
EXPORT_SYMBOL(mlx5_core_attach_mcg);
int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
{
- u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
void *gid;
MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
memcpy(gid, mgid, sizeof(*mgid));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, detach_from_mcg, in);
}
EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index da67b28d6e23..a806e3de7b7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,14 +38,10 @@
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
-#include <linux/ptp_clock_kernel.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/driver.h>
-#define DRIVER_NAME "mlx5_core"
-#define DRIVER_VERSION "5.0-0"
-
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
@@ -101,6 +97,35 @@ do { \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...)
+{
+ struct device *device = dev->device;
+ struct va_format vaf;
+ va_list args;
+
+ if (WARN_ONCE(level < LOGLEVEL_EMERG || level > LOGLEVEL_DEBUG,
+ "Level %d is out of range, set to default level\n", level))
+ level = LOGLEVEL_DEFAULT;
+
+ va_start(args, format);
+ vaf.fmt = format;
+ vaf.va = &args;
+
+ dev_printk_emit(level, device, "%s %s: %pV", dev_driver_string(device), dev_name(device),
+ &vaf);
+ va_end(args);
+}
+
+#define mlx5_log(__dev, level, format, ...) \
+ mlx5_printk(__dev, level, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev)
+{
+ return &dev->pdev->dev;
+}
+
enum {
MLX5_CMD_DATA, /* print command payload only */
MLX5_CMD_TIME, /* print command execution time */
@@ -116,21 +141,61 @@ enum mlx5_semaphore_space_address {
MLX5_SEMAPHORE_SW_RESET = 0x20,
};
+#define MLX5_DEFAULT_PROF 2
+
+static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed,
+ size_t item_size, size_t num_items,
+ const char *func, int line)
+{
+ int inlen;
+
+ if (fixed > INT_MAX || item_size > INT_MAX || num_items > INT_MAX) {
+ mlx5_core_err(dev, "%s: %s:%d: input values too big: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ if (check_mul_overflow((int)item_size, (int)num_items, &inlen)) {
+ mlx5_core_err(dev, "%s: %s:%d: multiplication overflow: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ if (check_add_overflow((int)fixed, inlen, &inlen)) {
+ mlx5_core_err(dev, "%s: %s:%d: addition overflow: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ return inlen;
+}
+
+#define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \
+ mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__)
+
int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
int mlx5_query_board_id(struct mlx5_core_dev *dev);
+int mlx5_cmd_init(struct mlx5_core_dev *dev);
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
+void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
+ enum mlx5_cmdif_state cmdif_state);
int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev);
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
-void mlx5_recover_device(struct mlx5_core_dev *dev);
+int mlx5_recover_device(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+void mlx5_sriov_disable(struct pci_dev *pdev);
+int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
@@ -141,10 +206,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
- struct ptp_system_timestamp *sts);
-void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
void mlx5_cmd_flush(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
@@ -156,43 +218,32 @@ int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
u8 feature_group, u8 access_reg_group);
-void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
-void mlx5_lag_remove(struct mlx5_core_dev *dev);
-
-int mlx5_irq_table_init(struct mlx5_core_dev *dev);
-void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
-int mlx5_irq_table_create(struct mlx5_core_dev *dev);
-void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
-int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
- struct notifier_block *nb);
-int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
- struct notifier_block *nb);
-struct cpumask *
-mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
-struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
-int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
void mlx5_events_start(struct mlx5_core_dev *dev);
void mlx5_events_stop(struct mlx5_core_dev *dev);
-void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
-void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
-void mlx5_attach_device(struct mlx5_core_dev *dev);
+int mlx5_adev_idx_alloc(void);
+void mlx5_adev_idx_free(int idx);
+void mlx5_adev_cleanup(struct mlx5_core_dev *dev);
+int mlx5_adev_init(struct mlx5_core_dev *dev);
+
+int mlx5_attach_device(struct mlx5_core_dev *dev);
void mlx5_detach_device(struct mlx5_core_dev *dev);
-bool mlx5_device_registered(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
-void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
-void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
-bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
-
int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
@@ -211,8 +262,13 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
int mlx5_fw_version_query(struct mlx5_core_dev *dev,
u32 *running_ver, u32 *stored_ver);
-void mlx5e_init(void);
+#ifdef CONFIG_MLX5_CORE_EN
+int mlx5e_init(void);
void mlx5e_cleanup(void);
+#else
+static inline int mlx5e_init(void){ return 0; }
+static inline void mlx5e_cleanup(void){}
+#endif
static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
{
@@ -231,7 +287,17 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
MLX5_CAP_GEN(dev, lag_master);
}
-void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev);
+static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ mlx5_dev_list_lock();
+ ret = mlx5_rescan_drivers_locked(dev);
+ mlx5_dev_list_unlock();
+ return ret;
+}
+
void mlx5_lag_update(struct mlx5_core_dev *dev);
enum {
@@ -244,6 +310,33 @@ enum {
u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
-int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup);
-int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
+static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev)
+{
+ return dev->coredev_type == MLX5_COREDEV_SF;
+}
+
+int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx);
+void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
+int mlx5_init_one(struct mlx5_core_dev *dev);
+void mlx5_uninit_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev);
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
+
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
+
+void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
+static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+ return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+}
+
+bool mlx5_eth_supported(struct mlx5_core_dev *dev);
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev);
+
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
new file mode 100644
index 000000000000..23cb63fa4588
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_IRQ_H__
+#define __MLX5_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_COMP_EQS_PER_SF 8
+
+struct mlx5_irq;
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
+struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
+
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
+ int msix_vec_count);
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
+ struct cpumask *affinity);
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs);
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
+int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
+int mlx5_irq_get_index(struct mlx5_irq *irq);
+
+struct mlx5_irq_pool;
+#ifdef CONFIG_MLX5_SF
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs);
+struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask);
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs);
+#else
+static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev,
+ struct mlx5_irq **irqs, int num_irqs) {}
+#endif
+#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 42cc3c7ac5b6..9d735c343a3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -31,81 +31,49 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
-int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
- struct mlx5_core_mkey *mkey,
- struct mlx5_async_ctx *async_ctx, u32 *in,
- int inlen, u32 *out, int outlen,
- mlx5_async_cbk_t callback,
- struct mlx5_async_work *context)
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
+ int inlen)
{
- u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+ u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
u32 mkey_index;
- void *mkc;
int err;
- u8 key;
-
- spin_lock_irq(&dev->priv.mkey_lock);
- key = dev->priv.mkey_key++;
- spin_unlock_irq(&dev->priv.mkey_lock);
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
- MLX5_SET(mkc, mkc, mkey_7_0, key);
-
- if (callback)
- return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
- callback, context);
err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
if (err)
return err;
mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
- mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
- mkey->size = MLX5_GET64(mkc, mkc, len);
- mkey->key = mlx5_idx_to_mkey(mkey_index) | key;
- mkey->pd = MLX5_GET(mkc, mkc, pd);
+ *mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) |
+ mlx5_idx_to_mkey(mkey_index);
- mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
- mkey_index, key, mkey->key);
+ mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey);
return 0;
}
-EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
-
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
- struct mlx5_core_mkey *mkey,
- u32 *in, int inlen)
-{
- return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
- NULL, 0, NULL, NULL);
-}
EXPORT_SYMBOL(mlx5_core_create_mkey);
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
- struct mlx5_core_mkey *mkey)
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey)
{
- u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
- MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
+ return mlx5_cmd_exec_in(dev, destroy_mkey, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_mkey);
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
- u32 *out, int outlen)
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out,
+ int outlen)
{
- u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {};
memset(out, 0, outlen);
MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
- MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+ MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
EXPORT_SYMBOL(mlx5_core_query_mkey);
@@ -123,8 +91,8 @@ static inline u32 mlx5_get_psv(u32 *out, int psv_index)
int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
int npsvs, u32 *sig_index)
{
- u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {};
int i, err;
if (npsvs > MLX5_MAX_PSVS)
@@ -134,7 +102,7 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
MLX5_SET(create_psv_in, in, pd, pdn);
MLX5_SET(create_psv_in, in, num_psv, npsvs);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, create_psv, in, out);
if (err)
return err;
@@ -147,11 +115,10 @@ EXPORT_SYMBOL(mlx5_core_create_psv);
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
{
- u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {};
MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
MLX5_SET(destroy_psv_in, in, psvn, psv_num);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, destroy_psv, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_psv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 91bd258ecf1b..60596357bfc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -32,12 +32,12 @@
#include <linux/highmem.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/delay.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
+#include <linux/xarray.h>
#include "mlx5_core.h"
#include "lib/eq.h"
+#include "lib/tout.h"
enum {
MLX5_PAGES_CANT_GIVE = 0,
@@ -51,21 +51,17 @@ struct mlx5_pages_req {
u8 ec_function;
s32 npages;
struct work_struct work;
+ u8 release_all;
};
struct fw_page {
struct rb_node rb_node;
u64 addr;
struct page *page;
- u16 func_id;
+ u32 function;
unsigned long bitmask;
struct list_head list;
- unsigned free_count;
-};
-
-enum {
- MAX_RECLAIM_TIME_MSECS = 5000,
- MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
+ unsigned int free_count;
};
enum {
@@ -73,15 +69,50 @@ enum {
MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
};
-static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+static u32 get_function(u16 func_id, bool ec_function)
+{
+ return (u32)func_id | (ec_function << 16);
+}
+
+static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function)
+{
+ struct rb_root *root;
+ int err;
+
+ root = xa_load(&dev->priv.page_root_xa, function);
+ if (root)
+ return root;
+
+ root = kzalloc(sizeof(*root), GFP_KERNEL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+
+ err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL);
+ if (err) {
+ kfree(root);
+ return ERR_PTR(err);
+ }
+
+ *root = RB_ROOT;
+
+ return root;
+}
+
+static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function)
{
- struct rb_root *root = &dev->priv.page_root;
- struct rb_node **new = &root->rb_node;
struct rb_node *parent = NULL;
+ struct rb_root *root;
+ struct rb_node **new;
struct fw_page *nfp;
struct fw_page *tfp;
int i;
+ root = page_root_per_function(dev, function);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+
+ new = &root->rb_node;
+
while (*new) {
parent = *new;
tfp = rb_entry(parent, struct fw_page, rb_node);
@@ -99,7 +130,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
nfp->addr = addr;
nfp->page = page;
- nfp->func_id = func_id;
+ nfp->function = function;
nfp->free_count = MLX5_NUM_4K_IN_PAGE;
for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
set_bit(i, &nfp->bitmask);
@@ -111,13 +142,20 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
return 0;
}
-static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr,
+ u32 function)
{
- struct rb_root *root = &dev->priv.page_root;
- struct rb_node *tmp = root->rb_node;
struct fw_page *result = NULL;
+ struct rb_root *root;
+ struct rb_node *tmp;
struct fw_page *tfp;
+ root = xa_load(&dev->priv.page_root_xa, function);
+ if (WARN_ON_ONCE(!root))
+ return NULL;
+
+ tmp = root->rb_node;
+
while (tmp) {
tfp = rb_entry(tmp, struct fw_page, rb_node);
if (tfp->addr < addr) {
@@ -136,8 +174,8 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
s32 *npages, int boot)
{
- u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {};
int err;
MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
@@ -146,7 +184,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, query_pages, in, out);
if (err)
return err;
@@ -156,15 +194,21 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
return err;
}
-static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function)
{
- struct fw_page *fp;
+ struct fw_page *fp = NULL;
+ struct fw_page *iter;
unsigned n;
- if (list_empty(&dev->priv.free_list))
+ list_for_each_entry(iter, &dev->priv.free_list, list) {
+ if (iter->function != function)
+ continue;
+ fp = iter;
+ }
+
+ if (list_empty(&dev->priv.free_list) || !fp)
return -ENOMEM;
- fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
if (n >= MLX5_NUM_4K_IN_PAGE) {
mlx5_core_warn(dev, "alloc 4k bug\n");
@@ -182,36 +226,46 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
-static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
+ bool in_free_list)
+{
+ struct rb_root *root;
+
+ root = xa_load(&dev->priv.page_root_xa, fwp->function);
+ if (WARN_ON_ONCE(!root))
+ return;
+
+ rb_erase(&fwp->rb_node, root);
+ if (in_free_list)
+ list_del(&fwp->list);
+ dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
+}
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function)
{
struct fw_page *fwp;
int n;
- fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
+ fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function);
if (!fwp) {
- mlx5_core_warn(dev, "page not found\n");
+ mlx5_core_warn_rl(dev, "page not found\n");
return;
}
-
n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++;
set_bit(n, &fwp->bitmask);
- if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
- rb_erase(&fwp->rb_node, &dev->priv.page_root);
- if (fwp->free_count != 1)
- list_del(&fwp->list);
- dma_unmap_page(dev->device, addr & MLX5_U64_4K_PAGE_MASK,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
- __free_page(fwp->page);
- kfree(fwp);
- } else if (fwp->free_count == 1) {
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE)
+ free_fwp(dev, fwp, fwp->free_count != 1);
+ else if (fwp->free_count == 1)
list_add(&fwp->list, &dev->priv.free_list);
- }
}
-static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+static int alloc_system_page(struct mlx5_core_dev *dev, u32 function)
{
- struct device *device = dev->device;
+ struct device *device = mlx5_core_dma_dev(dev);
int nid = dev_to_node(device);
struct page *page;
u64 zero_addr = 1;
@@ -237,7 +291,7 @@ map:
goto map;
}
- err = insert_page(dev, addr, page, func_id);
+ err = insert_page(dev, addr, page, function);
if (err) {
mlx5_core_err(dev, "failed to track allocated page\n");
dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -257,8 +311,7 @@ err_mapping:
static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
bool ec_function)
{
- u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
int err;
MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
@@ -266,17 +319,19 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
MLX5_SET(manage_pages_in, in, function_id, func_id);
MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_in(dev, manage_pages, in);
if (err)
mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
func_id, err);
}
static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
- int notify_fail, bool ec_function)
+ int event, bool ec_function)
{
+ u32 function = get_function(func_id, ec_function);
u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
+ int notify_fail = event;
u64 addr;
int err;
u32 *in;
@@ -292,12 +347,14 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
for (i = 0; i < npages; i++) {
retry:
- err = alloc_4k(dev, &addr);
+ err = alloc_4k(dev, &addr, function);
if (err) {
if (err == -ENOMEM)
- err = alloc_system_page(dev, func_id);
- if (err)
+ err = alloc_system_page(dev, function);
+ if (err) {
+ dev->priv.fw_pages_alloc_failed += (npages - i);
goto out_4k;
+ }
goto retry;
}
@@ -310,18 +367,27 @@ retry:
MLX5_SET(manage_pages_in, in, input_num_entries, npages);
MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out));
+ if (err == -EREMOTEIO) {
+ notify_fail = 0;
+ /* if triggered by FW and failed by FW ignore */
+ if (event) {
+ err = 0;
+ goto out_dropped;
+ }
+ }
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
func_id, npages, err);
- goto out_4k;
+ goto out_dropped;
}
dev->priv.fw_pages += npages;
if (func_id)
dev->priv.vfs_pages += npages;
else if (mlx5_core_is_ecpf(dev) && !ec_function)
- dev->priv.peer_pf_pages += npages;
+ dev->priv.host_pf_pages += npages;
mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
npages, ec_function, func_id, err);
@@ -329,9 +395,11 @@ retry:
kvfree(in);
return 0;
+out_dropped:
+ dev->priv.give_pages_dropped += npages;
out_4k:
for (i--; i >= 0; i--)
- free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+ free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function);
out_free:
kvfree(in);
if (notify_fail)
@@ -339,42 +407,96 @@ out_free:
return err;
}
+static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id,
+ bool ec_function)
+{
+ u32 function = get_function(func_id, ec_function);
+ struct rb_root *root;
+ struct rb_node *p;
+ int npages = 0;
+
+ root = xa_load(&dev->priv.page_root_xa, function);
+ if (WARN_ON_ONCE(!root))
+ return;
+
+ p = rb_first(root);
+ while (p) {
+ struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node);
+
+ p = rb_next(p);
+ npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
+ free_fwp(dev, fwp, fwp->free_count);
+ }
+
+ dev->priv.fw_pages -= npages;
+ if (func_id)
+ dev->priv.vfs_pages -= npages;
+ else if (mlx5_core_is_ecpf(dev) && !ec_function)
+ dev->priv.host_pf_pages -= npages;
+
+ mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n",
+ npages, ec_function, func_id);
+}
+
+static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index,
+ u32 npages)
+{
+ u32 pages_set = 0;
+ unsigned int n;
+
+ for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) {
+ MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set,
+ fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE));
+ pages_set++;
+
+ if (!--npages)
+ break;
+ }
+
+ return pages_set;
+}
+
static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
u32 *in, int in_size, u32 *out, int out_size)
{
+ struct rb_root *root;
struct fw_page *fwp;
struct rb_node *p;
+ bool ec_function;
u32 func_id;
u32 npages;
u32 i = 0;
- if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
- return mlx5_cmd_exec(dev, in, in_size, out, out_size);
+ if (!mlx5_cmd_is_down(dev))
+ return mlx5_cmd_do(dev, in, in_size, out, out_size);
/* No hard feelings, we want our pages back! */
npages = MLX5_GET(manage_pages_in, in, input_num_entries);
func_id = MLX5_GET(manage_pages_in, in, function_id);
+ ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function);
+
+ root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function));
+ if (WARN_ON_ONCE(!root))
+ return -EEXIST;
- p = rb_first(&dev->priv.page_root);
+ p = rb_first(root);
while (p && i < npages) {
fwp = rb_entry(p, struct fw_page, rb_node);
p = rb_next(p);
- if (fwp->func_id != func_id)
- continue;
- MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr);
- i++;
+ i += fwp_fill_manage_pages_out(fwp, out, i, npages - i);
}
MLX5_SET(manage_pages_out, out, output_num_entries, i);
return 0;
}
-static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
- int *nclaimed, bool ec_function)
+static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ int *nclaimed, bool event, bool ec_function)
{
+ u32 function = get_function(func_id, ec_function);
int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
- u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
int num_claimed;
u32 *out;
int err;
@@ -394,9 +516,21 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
MLX5_SET(manage_pages_in, in, input_num_entries, npages);
MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
- mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
+ mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n",
+ func_id, npages, outlen);
err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
if (err) {
+ npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+ dev->priv.reclaim_pages_discard += npages;
+ }
+ /* if triggered by FW event and failed by FW then ignore */
+ if (event && err == -EREMOTEIO) {
+ err = 0;
+ goto out_free;
+ }
+
+ err = mlx5_cmd_check(dev, err, in, out);
+ if (err) {
mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
@@ -410,7 +544,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
}
for (i = 0; i < num_claimed; i++)
- free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+ free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function);
if (nclaimed)
*nclaimed = num_claimed;
@@ -419,7 +553,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
if (func_id)
dev->priv.vfs_pages -= num_claimed;
else if (mlx5_core_is_ecpf(dev) && !ec_function)
- dev->priv.peer_pf_pages -= num_claimed;
+ dev->priv.host_pf_pages -= num_claimed;
out_free:
kvfree(out);
@@ -432,9 +566,11 @@ static void pages_work_handler(struct work_struct *work)
struct mlx5_core_dev *dev = req->dev;
int err = 0;
- if (req->npages < 0)
+ if (req->release_all)
+ release_all_pages(dev, req->func_id, req->ec_function);
+ else if (req->npages < 0)
err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
- req->ec_function);
+ true, req->ec_function);
else if (req->npages > 0)
err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
@@ -447,6 +583,7 @@ static void pages_work_handler(struct work_struct *work)
enum {
EC_FUNCTION_MASK = 0x8000,
+ RELEASE_ALL_PAGES_MASK = 0x4000,
};
static int req_pages_handler(struct notifier_block *nb,
@@ -457,6 +594,7 @@ static int req_pages_handler(struct notifier_block *nb,
struct mlx5_priv *priv;
struct mlx5_eqe *eqe;
bool ec_function;
+ bool release_all;
u16 func_id;
s32 npages;
@@ -467,8 +605,10 @@ static int req_pages_handler(struct notifier_block *nb,
func_id = be16_to_cpu(eqe->data.req_pages.func_id);
npages = be32_to_cpu(eqe->data.req_pages.num_pages);
ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
- mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
- func_id, npages);
+ release_all = be16_to_cpu(eqe->data.req_pages.ec_function) &
+ RELEASE_ALL_PAGES_MASK;
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d, release_all %d\n",
+ func_id, npages, release_all);
req = kzalloc(sizeof(*req), GFP_ATOMIC);
if (!req) {
mlx5_core_warn(dev, "failed to allocate pages request\n");
@@ -479,6 +619,7 @@ static int req_pages_handler(struct notifier_block *nb,
req->func_id = func_id;
req->npages = npages;
req->ec_function = ec_function;
+ req->release_all = release_all;
INIT_WORK(&req->work, pages_work_handler);
queue_work(dev->priv.pg_wq, &req->work);
return NOTIFY_OK;
@@ -486,8 +627,8 @@ static int req_pages_handler(struct notifier_block *nb,
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
{
- u16 uninitialized_var(func_id);
- s32 uninitialized_var(npages);
+ u16 func_id;
+ s32 npages;
int err;
err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
@@ -517,35 +658,50 @@ static int optimal_reclaimed_pages(void)
return ret;
}
-int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
+ struct rb_root *root, u16 func_id)
{
- unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
- struct fw_page *fwp;
- struct rb_node *p;
- int nclaimed = 0;
- int err = 0;
+ u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
+ unsigned long end = jiffies + recl_pages_to_jiffies;
- do {
- p = rb_first(&dev->priv.page_root);
- if (p) {
- fwp = rb_entry(p, struct fw_page, rb_node);
- err = reclaim_pages(dev, fwp->func_id,
- optimal_reclaimed_pages(),
- &nclaimed, mlx5_core_is_ecpf(dev));
+ while (!RB_EMPTY_ROOT(root)) {
+ int nclaimed;
+ int err;
- if (err) {
- mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
- err);
- return err;
- }
- if (nclaimed)
- end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+ err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(),
+ &nclaimed, false, mlx5_core_is_ecpf(dev));
+ if (err) {
+ mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n",
+ err, func_id);
+ return err;
}
+
+ if (nclaimed)
+ end = jiffies + recl_pages_to_jiffies;
+
if (time_after(jiffies, end)) {
mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
break;
}
- } while (p);
+ }
+
+ return 0;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+ struct rb_root *root;
+ unsigned long id;
+ void *entry;
+
+ xa_for_each(&dev->priv.page_root_xa, id, entry) {
+ root = entry;
+ mlx5_reclaim_root_pages(dev, root, id);
+ xa_erase(&dev->priv.page_root_xa, id);
+ kfree(root);
+ }
+
+ WARN_ON(!xa_empty(&dev->priv.page_root_xa));
WARN(dev->priv.fw_pages,
"FW pages counter is %d after reclaiming all pages\n",
@@ -553,26 +709,30 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
WARN(dev->priv.vfs_pages,
"VFs FW pages counter is %d after reclaiming all pages\n",
dev->priv.vfs_pages);
- WARN(dev->priv.peer_pf_pages,
- "Peer PF FW pages counter is %d after reclaiming all pages\n",
- dev->priv.peer_pf_pages);
+ WARN(dev->priv.host_pf_pages,
+ "External host PF FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.host_pf_pages);
return 0;
}
int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{
- dev->priv.page_root = RB_ROOT;
INIT_LIST_HEAD(&dev->priv.free_list);
dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
if (!dev->priv.pg_wq)
return -ENOMEM;
+ xa_init(&dev->priv.page_root_xa);
+ mlx5_pages_debugfs_init(dev);
+
return 0;
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
{
+ mlx5_pages_debugfs_cleanup(dev);
+ xa_destroy(&dev->priv.page_root_xa);
destroy_workqueue(dev->priv.pg_wq);
}
@@ -590,7 +750,8 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
{
- unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES));
+ unsigned long end = jiffies + recl_vf_pages_to_jiffies;
int prev_pages = *pages;
/* In case of internal error we will free the pages manually later */
@@ -606,7 +767,7 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
return -ETIMEDOUT;
}
if (*pages < prev_pages) {
- end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ end = jiffies + recl_vf_pages_to_jiffies;
prev_pages = *pages;
}
msleep(50);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 373981a659c7..662f1d55e30e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -3,332 +3,688 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+#include "lib/sf.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
-#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_SFS_PER_CTRL_IRQ 64
+#define MLX5_IRQ_CTRL_SF_MAX 8
+/* min num of vectors for SFs to be enabled */
+#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+
+#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
+#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
+#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
+#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
struct mlx5_irq {
struct atomic_notifier_head nh;
cpumask_var_t mask;
char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_irq_pool *pool;
+ int refcount;
+ u32 index;
+ int irqn;
};
struct mlx5_irq_table {
- struct mlx5_irq *irq;
- int nvec;
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
+ struct mlx5_irq_pool *pf_pool;
+ struct mlx5_irq_pool *sf_ctrl_pool;
+ struct mlx5_irq_pool *sf_comp_pool;
};
-int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+/**
+ * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
+ * to be ssigned to each VF.
+ * @dev: PF to work on
+ * @num_vfs: Number of enabled VFs
+ */
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
{
- struct mlx5_irq_table *irq_table;
+ int num_vf_msix, min_msix, max_msix;
- irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
- if (!irq_table)
- return -ENOMEM;
+ num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+ if (!num_vf_msix)
+ return 0;
- dev->priv.irq_table = irq_table;
- return 0;
+ min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
+ max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
+
+ /* Limit maximum number of MSI-X vectors so the default configuration
+ * has some available in the pool. This will allow the user to increase
+ * the number of vectors in a VF without having to first size-down other
+ * VFs.
+ */
+ return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
}
-void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+/**
+ * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
+ * @dev: PF to work on
+ * @function_id: Internal PCI VF function IDd
+ * @msix_vec_count: Number of MSI-X vectors to set
+ */
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
+ int msix_vec_count)
{
- kvfree(dev->priv.irq_table);
+ int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *hca_cap = NULL, *query_cap = NULL, *cap;
+ int num_vf_msix, min_msix, max_msix;
+ int ret;
+
+ num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+ if (!num_vf_msix)
+ return 0;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
+ return -EOPNOTSUPP;
+
+ min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
+ max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
+
+ if (msix_vec_count < min_msix)
+ return -EINVAL;
+
+ if (msix_vec_count > max_msix)
+ return -EOVERFLOW;
+
+ query_cap = kvzalloc(query_sz, GFP_KERNEL);
+ hca_cap = kvzalloc(set_sz, GFP_KERNEL);
+ if (!hca_cap || !query_cap) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+ if (ret)
+ goto out;
+
+ cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+ memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
+
+ MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
+ MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
+
+ MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
+ ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+out:
+ kvfree(hca_cap);
+ kvfree(query_cap);
+ return ret;
}
-int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+static void irq_release(struct mlx5_irq *irq)
{
- return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+ struct mlx5_irq_pool *pool = irq->pool;
+
+ xa_erase(&pool->irqs, irq->index);
+ /* free_irq requires that affinity_hint and rmap will be cleared
+ * before calling it. This is why there is asymmetry with set_rmap
+ * which should be called after alloc_irq but before request_irq.
+ */
+ irq_update_affinity_hint(irq->irqn, NULL);
+ free_cpumask_var(irq->mask);
+ free_irq(irq->irqn, &irq->nh);
+ kfree(irq);
}
-static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+int mlx5_irq_put(struct mlx5_irq *irq)
{
- struct mlx5_irq_table *irq_table = dev->priv.irq_table;
-
- return &irq_table->irq[vecidx];
+ struct mlx5_irq_pool *pool = irq->pool;
+ int ret = 0;
+
+ mutex_lock(&pool->lock);
+ irq->refcount--;
+ if (!irq->refcount) {
+ irq_release(irq);
+ ret = 1;
+ }
+ mutex_unlock(&pool->lock);
+ return ret;
}
-int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
- struct notifier_block *nb)
+int mlx5_irq_read_locked(struct mlx5_irq *irq)
{
- struct mlx5_irq *irq;
+ lockdep_assert_held(&irq->pool->lock);
+ return irq->refcount;
+}
- irq = &irq_table->irq[vecidx];
- return atomic_notifier_chain_register(&irq->nh, nb);
+int mlx5_irq_get_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ if (WARN_ON_ONCE(!irq->refcount))
+ return 0;
+ irq->refcount++;
+ return 1;
}
-int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
- struct notifier_block *nb)
+static int irq_get(struct mlx5_irq *irq)
{
- struct mlx5_irq *irq;
+ int err;
- irq = &irq_table->irq[vecidx];
- return atomic_notifier_chain_unregister(&irq->nh, nb);
+ mutex_lock(&irq->pool->lock);
+ err = mlx5_irq_get_locked(irq);
+ mutex_unlock(&irq->pool->lock);
+ return err;
}
-static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+static irqreturn_t irq_int_handler(int irq, void *nh)
{
atomic_notifier_call_chain(nh, 0, NULL);
return IRQ_HANDLED;
}
-static void irq_set_name(char *name, int vecidx)
+static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
+{
+ snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
+}
+
+static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
{
- if (vecidx == 0) {
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+ if (!pool->xa_num_irqs.max) {
+ /* in case we only have a single irq for the device */
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
return;
}
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
- vecidx - MLX5_IRQ_VEC_COMP_BASE);
- return;
+ if (vecidx == pool->xa_num_irqs.max) {
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
+ return;
+ }
+
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
-static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity)
{
+ struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_irq *irq;
int err;
- int i;
- for (i = 0; i < nvec; i++) {
- struct mlx5_irq *irq = mlx5_irq_get(dev, i);
- int irqn = pci_irq_vector(dev->pdev, i);
-
- irq_set_name(name, i);
- ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
- snprintf(irq->name, MLX5_MAX_IRQ_NAME,
- "%s@pci:%s", name, pci_name(dev->pdev));
- err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
- &irq->nh);
- if (err) {
- mlx5_core_err(dev, "Failed to request irq\n");
- goto err_request_irq;
- }
+ irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+ if (!irq)
+ return ERR_PTR(-ENOMEM);
+ irq->irqn = pci_irq_vector(dev->pdev, i);
+ if (!mlx5_irq_pool_is_sf_pool(pool))
+ irq_set_name(pool, name, i);
+ else
+ irq_sf_set_name(pool, name, i);
+ ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+ snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+ "%s@pci:%s", name, pci_name(dev->pdev));
+ err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+ &irq->nh);
+ if (err) {
+ mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
+ goto err_req_irq;
}
- return 0;
+ if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+ mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
+ err = -ENOMEM;
+ goto err_cpumask;
+ }
+ if (affinity) {
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_and_hint(irq->irqn, irq->mask);
+ }
+ irq->pool = pool;
+ irq->refcount = 1;
+ irq->index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
+ irq->index, err);
+ goto err_xa;
+ }
+ return irq;
+err_xa:
+ irq_update_affinity_hint(irq->irqn, NULL);
+ free_cpumask_var(irq->mask);
+err_cpumask:
+ free_irq(irq->irqn, &irq->nh);
+err_req_irq:
+ kfree(irq);
+ return ERR_PTR(err);
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
+{
+ int ret;
+
+ ret = irq_get(irq);
+ if (!ret)
+ /* Something very bad happens here, we are enabling EQ
+ * on non-existing IRQ.
+ */
+ return -ENOENT;
+ ret = atomic_notifier_chain_register(&irq->nh, nb);
+ if (ret)
+ mlx5_irq_put(irq);
+ return ret;
+}
-err_request_irq:
- for (; i >= 0; i--) {
- struct mlx5_irq *irq = mlx5_irq_get(dev, i);
- int irqn = pci_irq_vector(dev->pdev, i);
+int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
+{
+ int err = 0;
- free_irq(irqn, &irq->nh);
+ err = atomic_notifier_chain_unregister(&irq->nh, nb);
+ mlx5_irq_put(irq);
+ return err;
+}
+
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
+{
+ return irq->mask;
+}
+
+int mlx5_irq_get_index(struct mlx5_irq *irq)
+{
+ return irq->index;
+}
+
+/* irq_pool API */
+
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq *irq;
+
+ mutex_lock(&pool->lock);
+ irq = xa_load(&pool->irqs, vecidx);
+ if (irq) {
+ mlx5_irq_get_locked(irq);
+ goto unlock;
}
- return err;
+ irq = mlx5_irq_alloc(pool, vecidx, affinity);
+unlock:
+ mutex_unlock(&pool->lock);
+ return irq;
}
-static void irq_clear_rmap(struct mlx5_core_dev *dev)
+static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
{
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+ return irq_table->sf_ctrl_pool;
+}
- free_irq_cpu_rmap(irq_table->rmap);
-#endif
+static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->sf_comp_pool;
}
-static int irq_set_rmap(struct mlx5_core_dev *mdev)
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
{
- int err = 0;
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
- int num_affinity_vec;
- int vecidx;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
- irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
- if (!irq_table->rmap) {
- err = -ENOMEM;
- mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
- goto err_out;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
+
+ if (mlx5_core_is_sf(dev))
+ pool = sf_ctrl_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+/**
+ * mlx5_irqs_release - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
+{
+ int i;
+
+ for (i = 0; i < nirqs; i++) {
+ synchronize_irq(irqs[i]->irqn);
+ mlx5_irq_put(irqs[i]);
}
+}
+
+/**
+ * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
+ * @ctrl_irq: ctrl IRQ to be released.
+ */
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
+{
+ mlx5_irqs_release(&ctrl_irq, 1);
+}
- vecidx = MLX5_IRQ_VEC_COMP_BASE;
- for (; vecidx < irq_table->nvec; vecidx++) {
- err = irq_cpu_rmap_add(irq_table->rmap,
- pci_irq_vector(mdev->pdev, vecidx));
- if (err) {
- mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
- err);
- goto err_irq_cpu_rmap_add;
+/**
+ * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
+ * @dev: mlx5 device that requesting the IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ cpumask_copy(req_mask, cpu_online_mask);
+ if (!mlx5_irq_pool_is_sf_pool(pool)) {
+ /* In case we are allocating a control IRQ for PF/VF */
+ if (!pool->xa_num_irqs.max) {
+ cpumask_clear(req_mask);
+ /* In case we only have a single IRQ for PF/VF */
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
}
+ /* Allocate the IRQ in the last index of the pool */
+ irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ } else {
+ irq = mlx5_irq_affinity_request(pool, req_mask);
}
- return 0;
-err_irq_cpu_rmap_add:
- irq_clear_rmap(mdev);
-err_out:
-#endif
- return err;
+ free_cpumask_var(req_mask);
+ return irq;
}
-/* Completion IRQ vectors */
+/**
+ * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
+ * @dev: mlx5 device that requesting the IRQ.
+ * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
+ * provided.
+ * @affinity: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool;
+ struct mlx5_irq *irq;
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+ pool = irq_table->pf_pool;
+ irq = irq_pool_request_vector(pool, vecidx, affinity);
+ if (IS_ERR(irq))
+ return irq;
+ mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ irq->irqn, cpumask_pr_args(affinity),
+ irq->refcount / MLX5_EQ_REFS_PER_IRQ);
+ return irq;
+}
+
+/**
+ * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
{
- int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ mlx5_irqs_release(irqs, nirqs);
+}
+
+/**
+ * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @cpus: CPUs array for binding the IRQs
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bound to at most 1 CPU.
+ * This function is requests nirqs IRQs, starting from @vecidx.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ cpumask_var_t req_mask;
struct mlx5_irq *irq;
- int irqn;
+ int i;
- irq = mlx5_irq_get(mdev, vecidx);
- irqn = pci_irq_vector(mdev->pdev, vecidx);
- if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
return -ENOMEM;
+ for (i = 0; i < nirqs; i++) {
+ cpumask_set_cpu(cpus[i], req_mask);
+ irq = mlx5_irq_request(dev, i, req_mask);
+ if (IS_ERR(irq))
+ break;
+ cpumask_clear(req_mask);
+ irqs[i] = irq;
}
- cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
- irq->mask);
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irqn, irq->mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
- irqn);
+ free_cpumask_var(req_mask);
+ return i ? i : PTR_ERR(irq);
+}
- return 0;
+static struct mlx5_irq_pool *
+irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
+ u32 min_threshold, u32 max_threshold)
+{
+ struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+ pool->dev = dev;
+ mutex_init(&pool->lock);
+ xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
+ pool->xa_num_irqs.min = start;
+ pool->xa_num_irqs.max = start + size - 1;
+ if (name)
+ snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
+ "%s", name);
+ pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
+ pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
+ mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
+ name, size, start);
+ return pool;
}
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+static void irq_pool_free(struct mlx5_irq_pool *pool)
{
- int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
struct mlx5_irq *irq;
- int irqn;
+ unsigned long index;
- irq = mlx5_irq_get(mdev, vecidx);
- irqn = pci_irq_vector(mdev->pdev, vecidx);
- irq_set_affinity_hint(irqn, NULL);
- free_cpumask_var(irq->mask);
+ /* There are cases in which we are destrying the irq_table before
+ * freeing all the IRQs, fast teardown for example. Hence, free the irqs
+ * which might not have been freed.
+ */
+ xa_for_each(&pool->irqs, index, irq)
+ irq_release(irq);
+ xa_destroy(&pool->irqs);
+ mutex_destroy(&pool->lock);
+ kfree(pool->irqs_per_cpu);
+ kvfree(pool);
}
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
{
- int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int num_sf_ctrl_by_msix;
+ int num_sf_ctrl_by_sfs;
+ int num_sf_ctrl;
int err;
- int i;
- for (i = 0; i < nvec; i++) {
- err = set_comp_irq_affinity_hint(mdev, i);
- if (err)
- goto err_out;
+ /* init pf_pool */
+ table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
+ if (IS_ERR(table->pf_pool))
+ return PTR_ERR(table->pf_pool);
+ if (!mlx5_sf_max_functions(dev))
+ return 0;
+ if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
+ mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
+ return 0;
}
- return 0;
+ /* init sf_ctrl_pool */
+ num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
+ num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
+ MLX5_SFS_PER_CTRL_IRQ);
+ num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
+ num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
+ table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
+ "mlx5_sf_ctrl",
+ MLX5_EQ_SHARE_IRQ_MIN_CTRL,
+ MLX5_EQ_SHARE_IRQ_MAX_CTRL);
+ if (IS_ERR(table->sf_ctrl_pool)) {
+ err = PTR_ERR(table->sf_ctrl_pool);
+ goto err_pf;
+ }
+ /* init sf_comp_pool */
+ table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
+ sf_vec - num_sf_ctrl, "mlx5_sf_comp",
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
+ if (IS_ERR(table->sf_comp_pool)) {
+ err = PTR_ERR(table->sf_comp_pool);
+ goto err_sf_ctrl;
+ }
-err_out:
- for (i--; i >= 0; i--)
- clear_comp_irq_affinity_hint(mdev, i);
+ table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
+ if (!table->sf_comp_pool->irqs_per_cpu) {
+ err = -ENOMEM;
+ goto err_irqs_per_cpu;
+ }
+ return 0;
+
+err_irqs_per_cpu:
+ irq_pool_free(table->sf_comp_pool);
+err_sf_ctrl:
+ irq_pool_free(table->sf_ctrl_pool);
+err_pf:
+ irq_pool_free(table->pf_pool);
return err;
}
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+static void irq_pools_destroy(struct mlx5_irq_table *table)
{
- int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
- int i;
-
- for (i = 0; i < nvec; i++)
- clear_comp_irq_affinity_hint(mdev, i);
+ if (table->sf_ctrl_pool) {
+ irq_pool_free(table->sf_comp_pool);
+ irq_pool_free(table->sf_ctrl_pool);
+ }
+ irq_pool_free(table->pf_pool);
}
-struct cpumask *
-mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+/* irq_table API */
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
{
- return irq_table->irq[vecidx].mask;
+ struct mlx5_irq_table *irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return 0;
+
+ irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
+ dev->priv.numa_node);
+ if (!irq_table)
+ return -ENOMEM;
+
+ dev->priv.irq_table = irq_table;
+ return 0;
}
-#ifdef CONFIG_RFS_ACCEL
-struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
{
- return irq_table->rmap;
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ kvfree(dev->priv.irq_table);
}
-#endif
-static void unrequest_irqs(struct mlx5_core_dev *dev)
+int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
{
- struct mlx5_irq_table *table = dev->priv.irq_table;
- int i;
-
- for (i = 0; i < table->nvec; i++)
- free_irq(pci_irq_vector(dev->pdev, i),
- &mlx5_irq_get(dev, i)->nh);
+ if (!table->pf_pool->xa_num_irqs.max)
+ return 1;
+ return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
}
int mlx5_irq_table_create(struct mlx5_core_dev *dev)
{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_irq_table *table = priv->irq_table;
int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
+ int total_vec;
+ int pf_vec;
int err;
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_IRQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
- return -ENOMEM;
+ if (mlx5_core_is_sf(dev))
+ return 0;
- table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
- if (!table->irq)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
- nvec, PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq;
- }
+ pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
+ pf_vec = min_t(int, pf_vec, num_eqs);
- table->nvec = nvec;
+ total_vec = pf_vec;
+ if (mlx5_sf_max_functions(dev))
+ total_vec += MLX5_IRQ_CTRL_SF_MAX +
+ MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
- err = irq_set_rmap(dev);
- if (err)
- goto err_set_rmap;
+ total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
+ if (total_vec < 0)
+ return total_vec;
+ pf_vec = min(pf_vec, total_vec);
- err = request_irqs(dev, nvec);
+ err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
if (err)
- goto err_request_irqs;
+ pci_free_irq_vectors(dev->pdev);
- err = set_comp_irq_affinity_hints(dev);
- if (err) {
- mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
- goto err_set_affinity;
- }
-
- return 0;
-
-err_set_affinity:
- unrequest_irqs(dev);
-err_request_irqs:
- irq_clear_rmap(dev);
-err_set_rmap:
- pci_free_irq_vectors(dev->pdev);
-err_free_irq:
- kfree(table->irq);
return err;
}
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
- int i;
- /* free_irq requires that affinity and rmap will be cleared
- * before calling it. This is why there is asymmetry with set_rmap
- * which should be called after alloc_irq but before request_irq.
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ /* There are cases where IRQs still will be in used when we reaching
+ * to here. Hence, making sure all the irqs are released.
*/
- irq_clear_rmap(dev);
- clear_comp_irqs_affinity_hints(dev);
- for (i = 0; i < table->nvec; i++)
- free_irq(pci_irq_vector(dev->pdev, i),
- &mlx5_irq_get(dev, i)->nh);
+ irq_pools_destroy(table);
pci_free_irq_vectors(dev->pdev);
- kfree(table->irq);
}
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
+{
+ if (table->sf_comp_pool)
+ return min_t(int, num_online_cpus(),
+ table->sf_comp_pool->xa_num_irqs.max -
+ table->sf_comp_pool->xa_num_irqs.min + 1);
+ else
+ return mlx5_irq_table_get_num_comp(table);
+}
+
+struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_MLX5_SF
+ if (mlx5_core_is_sf(dev))
+ return dev->priv.parent_mdev->priv.irq_table;
+#endif
+ return dev->priv.irq_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
new file mode 100644
index 000000000000..5c7e68bee43a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __PCI_IRQ_H__
+#define __PCI_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+/* max irq_index is 2047, so four chars */
+#define MLX5_MAX_IRQ_IDX_CHARS (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
+struct mlx5_irq;
+
+struct mlx5_irq_pool {
+ char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
+ struct xa_limit xa_num_irqs;
+ struct mutex lock; /* sync IRQs creations */
+ struct xarray irqs;
+ u32 max_threshold;
+ u32 min_threshold;
+ u16 *irqs_per_cpu;
+ struct mlx5_core_dev *dev;
+};
+
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
+static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+{
+ return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+}
+
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity);
+int mlx5_irq_get_locked(struct mlx5_irq *irq);
+int mlx5_irq_read_locked(struct mlx5_irq *irq);
+int mlx5_irq_put(struct mlx5_irq *irq);
+
+#endif /* __PCI_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index bd830d8d6c5f..ee5ffdeb9015 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -31,19 +31,17 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
{
- u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
int err;
MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_pd, in, out);
if (!err)
*pdn = MLX5_GET(alloc_pd_out, out, pd);
return err;
@@ -52,11 +50,10 @@ EXPORT_SYMBOL(mlx5_core_alloc_pd);
int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, dealloc_pd, in);
}
EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index cc262b30aed5..a1548e6bfb35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -33,9 +33,10 @@
#include <linux/mlx5/port.h>
#include "mlx5_core.h"
-int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
- int size_in, void *data_out, int size_out,
- u16 reg_id, int arg, int write)
+/* calling with verbose false will not print error to log */
+int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
+ void *data_out, int size_out, u16 reg_id, int arg,
+ int write, bool verbose)
{
int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
@@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
MLX5_SET(access_register_in, in, argument, arg);
MLX5_SET(access_register_in, in, register_id, reg_id);
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
+ if (verbose)
+ err = mlx5_cmd_check(dev, err, in, out);
if (err)
goto out;
@@ -69,6 +72,15 @@ out:
kvfree(in);
return err;
}
+EXPORT_SYMBOL_GPL(mlx5_access_reg);
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ int size_in, void *data_out, int size_out,
+ u16 reg_id, int arg, int write)
+{
+ return mlx5_access_reg(dev, data_in, size_in, data_out, size_out,
+ reg_id, arg, write, true);
+}
EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
@@ -154,24 +166,8 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
sizeof(out), MLX5_REG_MLCR, 0, 1);
}
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- u8 *link_width_oper, u8 local_port)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
- if (err)
- return err;
-
- *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
-
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
- u8 *proto_oper, u8 local_port)
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port)
{
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
int err;
@@ -181,11 +177,12 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
if (err)
return err;
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
*proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
return 0;
}
-EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+EXPORT_SYMBOL(mlx5_query_ib_port_oper);
/* This function should be used after setting a port register only */
void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
@@ -278,7 +275,6 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
{
u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
- int module_mapping;
int err;
MLX5_SET(pmlp_reg, in, local_port, 1);
@@ -287,13 +283,47 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
if (err)
return err;
- module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping);
- *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK;
+ *module_num = MLX5_GET(lane_2_module_mapping,
+ MLX5_ADDR_OF(pmlp_reg, out, lane0_module_mapping),
+ module);
+
+ return 0;
+}
+
+static int mlx5_query_module_id(struct mlx5_core_dev *dev, int module_num,
+ u8 *module_id)
+{
+ u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {};
+ u32 out[MLX5_ST_SZ_DW(mcia_reg)];
+ int err, status;
+ u8 *ptr;
+
+ MLX5_SET(mcia_reg, in, i2c_device_address, MLX5_I2C_ADDR_LOW);
+ MLX5_SET(mcia_reg, in, module, module_num);
+ MLX5_SET(mcia_reg, in, device_address, 0);
+ MLX5_SET(mcia_reg, in, page_number, 0);
+ MLX5_SET(mcia_reg, in, size, 1);
+ MLX5_SET(mcia_reg, in, l, 0);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCIA, 0, 0);
+ if (err)
+ return err;
+
+ status = MLX5_GET(mcia_reg, out, status);
+ if (status) {
+ mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
+ status);
+ return -EIO;
+ }
+ ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+
+ *module_id = ptr[0];
return 0;
}
-static int mlx5_eeprom_page(int offset)
+static int mlx5_qsfp_eeprom_page(u16 offset)
{
if (offset < MLX5_EEPROM_PAGE_LENGTH)
/* Addresses between 0-255 - page 00 */
@@ -307,7 +337,7 @@ static int mlx5_eeprom_page(int offset)
MLX5_EEPROM_HIGH_PAGE_LENGTH);
}
-static int mlx5_eeprom_high_page_offset(int page_num)
+static int mlx5_qsfp_eeprom_high_page_offset(int page_num)
{
if (!page_num) /* Page 0 always start from low page */
return 0;
@@ -316,42 +346,48 @@ static int mlx5_eeprom_high_page_offset(int page_num)
return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH;
}
-int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
- u16 offset, u16 size, u8 *data)
+static void mlx5_qsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset)
{
- int module_num, page_num, status, err;
- u32 out[MLX5_ST_SZ_DW(mcia_reg)];
- u32 in[MLX5_ST_SZ_DW(mcia_reg)];
- u16 i2c_addr;
- void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+ *i2c_addr = MLX5_I2C_ADDR_LOW;
+ *page_num = mlx5_qsfp_eeprom_page(*offset);
+ *offset -= mlx5_qsfp_eeprom_high_page_offset(*page_num);
+}
- err = mlx5_query_module_num(dev, &module_num);
- if (err)
- return err;
+static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset)
+{
+ *i2c_addr = MLX5_I2C_ADDR_LOW;
+ *page_num = 0;
- memset(in, 0, sizeof(in));
- size = min_t(int, size, MLX5_EEPROM_MAX_BYTES);
+ if (*offset < MLX5_EEPROM_PAGE_LENGTH)
+ return;
- /* Get the page number related to the given offset */
- page_num = mlx5_eeprom_page(offset);
+ *i2c_addr = MLX5_I2C_ADDR_HIGH;
+ *offset -= MLX5_EEPROM_PAGE_LENGTH;
+}
- /* Set the right offset according to the page number,
- * For page_num > 0, relative offset is always >= 128 (high page).
- */
- offset -= mlx5_eeprom_high_page_offset(page_num);
+static int mlx5_mcia_max_bytes(struct mlx5_core_dev *dev)
+{
+ /* mcia supports either 12 dwords or 32 dwords */
+ return (MLX5_CAP_MCAM_FEATURE(dev, mcia_32dwords) ? 32 : 12) * sizeof(u32);
+}
- if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
- /* Cross pages read, read until offset 256 in low page */
- size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
+static int mlx5_query_mcia(struct mlx5_core_dev *dev,
+ struct mlx5_module_eeprom_query_params *params, u8 *data)
+{
+ u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {};
+ u32 out[MLX5_ST_SZ_DW(mcia_reg)];
+ int status, err;
+ void *ptr;
+ u16 size;
- i2c_addr = MLX5_I2C_ADDR_LOW;
+ size = min_t(int, params->size, mlx5_mcia_max_bytes(dev));
MLX5_SET(mcia_reg, in, l, 0);
- MLX5_SET(mcia_reg, in, module, module_num);
- MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr);
- MLX5_SET(mcia_reg, in, page_number, page_num);
- MLX5_SET(mcia_reg, in, device_address, offset);
MLX5_SET(mcia_reg, in, size, size);
+ MLX5_SET(mcia_reg, in, module, params->module_number);
+ MLX5_SET(mcia_reg, in, device_address, params->offset);
+ MLX5_SET(mcia_reg, in, page_number, params->page);
+ MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address);
err = mlx5_core_access_reg(dev, in, sizeof(in), out,
sizeof(out), MLX5_REG_MCIA, 0, 0);
@@ -365,12 +401,72 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
return -EIO;
}
+ ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
memcpy(data, ptr, size);
return size;
}
+
+int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
+ u16 offset, u16 size, u8 *data)
+{
+ struct mlx5_module_eeprom_query_params query = {0};
+ u8 module_id;
+ int err;
+
+ err = mlx5_query_module_num(dev, &query.module_number);
+ if (err)
+ return err;
+
+ err = mlx5_query_module_id(dev, query.module_number, &module_id);
+ if (err)
+ return err;
+
+ switch (module_id) {
+ case MLX5_MODULE_ID_SFP:
+ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ case MLX5_MODULE_ID_QSFP:
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ default:
+ mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+ return -EINVAL;
+ }
+
+ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+ size = MLX5_EEPROM_PAGE_LENGTH - offset;
+
+ query.size = size;
+ query.offset = offset;
+
+ return mlx5_query_mcia(dev, &query, data);
+}
EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
+int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
+ struct mlx5_module_eeprom_query_params *params,
+ u8 *data)
+{
+ int err;
+
+ err = mlx5_query_module_num(dev, &params->module_number);
+ if (err)
+ return err;
+
+ if (params->i2c_address != MLX5_I2C_ADDR_HIGH &&
+ params->i2c_address != MLX5_I2C_ADDR_LOW) {
+ mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address);
+ return -EINVAL;
+ }
+
+ return mlx5_query_mcia(dev, params, data);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page);
+
static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
int pvlc_size, u8 local_port)
{
@@ -397,29 +493,6 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
-int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
- u8 port_num, void *out, size_t sz)
-{
- u32 *in;
- int err;
-
- in = kvzalloc(sz, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- return err;
- }
-
- MLX5_SET(ppcnt_reg, in, local_port, port_num);
-
- MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
- err = mlx5_core_access_reg(dev, in, sz, out,
- sz, MLX5_REG_PPCNT, 0, 0);
-
- kvfree(in);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
-
static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
u32 out_size)
{
@@ -763,24 +836,23 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
{
- u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {};
MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, set_wol_rol, in);
}
EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
{
- u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {};
int err;
MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, query_wol_rol, in, out);
if (!err)
*wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
new file mode 100644
index 000000000000..0777be24a307
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "qos.h"
+
+#define MLX5_QOS_DEFAULT_DWRR_UID 0
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev)
+{
+ if (!MLX5_CAP_GEN(mdev, qos))
+ return false;
+ if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling))
+ return false;
+ if (!MLX5_CAP_QOS(mdev, nic_bw_share))
+ return false;
+ if (!MLX5_CAP_QOS(mdev, nic_rate_limit))
+ return false;
+ return true;
+}
+
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+ return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group);
+}
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+ return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+ sched_ctx, id);
+}
+
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ void *attr;
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+ attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
+
+ return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+ sched_ctx, id);
+}
+
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
+{
+ return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
+}
+
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 id)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ u32 bitmask = 0;
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+ return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+ sched_ctx, id, bitmask);
+}
+
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id)
+{
+ return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
new file mode 100644
index 000000000000..125e4e47e6f7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_QOS_H
+#define __MLX5_QOS_H
+
+#include "mlx5_core.h"
+
+#define MLX5_DEBUG_QOS_MASK BIT(4)
+
+#define qos_err(mdev, fmt, ...) \
+ mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_warn(mdev, fmt, ...) \
+ mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_dbg(mdev, fmt, ...) \
+ mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__)
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev);
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share,
+ u32 max_avg_bw, u32 id);
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
deleted file mode 100644
index c3aea4cc2fff..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ /dev/null
@@ -1,737 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/gfp.h>
-#include <linux/export.h>
-#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/qp.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/transobj.h>
-
-#include "mlx5_core.h"
-#include "lib/eq.h"
-
-static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
- struct mlx5_core_dct *dct);
-
-static struct mlx5_core_rsc_common *
-mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
-{
- struct mlx5_core_rsc_common *common;
- unsigned long flags;
-
- spin_lock_irqsave(&table->lock, flags);
-
- common = radix_tree_lookup(&table->tree, rsn);
- if (common)
- refcount_inc(&common->refcount);
-
- spin_unlock_irqrestore(&table->lock, flags);
-
- return common;
-}
-
-void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
-{
- if (refcount_dec_and_test(&common->refcount))
- complete(&common->free);
-}
-
-static u64 qp_allowed_event_types(void)
-{
- u64 mask;
-
- mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
- BIT(MLX5_EVENT_TYPE_COMM_EST) |
- BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
- BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
- BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
- BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
- BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
- BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
-
- return mask;
-}
-
-static u64 rq_allowed_event_types(void)
-{
- u64 mask;
-
- mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
- BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
-
- return mask;
-}
-
-static u64 sq_allowed_event_types(void)
-{
- return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
-}
-
-static u64 dct_allowed_event_types(void)
-{
- return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
-}
-
-static bool is_event_type_allowed(int rsc_type, int event_type)
-{
- switch (rsc_type) {
- case MLX5_EVENT_QUEUE_TYPE_QP:
- return BIT(event_type) & qp_allowed_event_types();
- case MLX5_EVENT_QUEUE_TYPE_RQ:
- return BIT(event_type) & rq_allowed_event_types();
- case MLX5_EVENT_QUEUE_TYPE_SQ:
- return BIT(event_type) & sq_allowed_event_types();
- case MLX5_EVENT_QUEUE_TYPE_DCT:
- return BIT(event_type) & dct_allowed_event_types();
- default:
- WARN(1, "Event arrived for unknown resource type");
- return false;
- }
-}
-
-static int rsc_event_notifier(struct notifier_block *nb,
- unsigned long type, void *data)
-{
- struct mlx5_core_rsc_common *common;
- struct mlx5_qp_table *table;
- struct mlx5_core_dev *dev;
- struct mlx5_core_dct *dct;
- u8 event_type = (u8)type;
- struct mlx5_core_qp *qp;
- struct mlx5_priv *priv;
- struct mlx5_eqe *eqe;
- u32 rsn;
-
- switch (event_type) {
- case MLX5_EVENT_TYPE_DCT_DRAINED:
- eqe = data;
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
- break;
- case MLX5_EVENT_TYPE_PATH_MIG:
- case MLX5_EVENT_TYPE_COMM_EST:
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- eqe = data;
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
- break;
- default:
- return NOTIFY_DONE;
- }
-
- table = container_of(nb, struct mlx5_qp_table, nb);
- priv = container_of(table, struct mlx5_priv, qp_table);
- dev = container_of(priv, struct mlx5_core_dev, priv);
-
- mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
-
- common = mlx5_get_rsc(table, rsn);
- if (!common) {
- mlx5_core_dbg(dev, "Async event for unknown resource 0x%x\n", rsn);
- return NOTIFY_OK;
- }
-
- if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
- mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
- event_type, rsn);
- goto out;
- }
-
- switch (common->res) {
- case MLX5_RES_QP:
- case MLX5_RES_RQ:
- case MLX5_RES_SQ:
- qp = (struct mlx5_core_qp *)common;
- qp->event(qp, event_type);
- break;
- case MLX5_RES_DCT:
- dct = (struct mlx5_core_dct *)common;
- if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
- complete(&dct->drained);
- break;
- default:
- mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
- }
-out:
- mlx5_core_put_rsc(common);
-
- return NOTIFY_OK;
-}
-
-static int create_resource_common(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *qp,
- int rsc_type)
-{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
- int err;
-
- qp->common.res = rsc_type;
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree,
- qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
- qp);
- spin_unlock_irq(&table->lock);
- if (err)
- return err;
-
- refcount_set(&qp->common.refcount, 1);
- init_completion(&qp->common.free);
- qp->pid = current->pid;
-
- return 0;
-}
-
-static void destroy_resource_common(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *qp)
-{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
- unsigned long flags;
-
- spin_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree,
- qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
- spin_unlock_irqrestore(&table->lock, flags);
- mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
- wait_for_completion(&qp->common.free);
-}
-
-static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
- struct mlx5_core_dct *dct, bool need_cleanup)
-{
- u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
- struct mlx5_core_qp *qp = &dct->mqp;
- int err;
-
- err = mlx5_core_drain_dct(dev, dct);
- if (err) {
- if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- goto destroy;
- } else {
- mlx5_core_warn(
- dev, "failed drain DCT 0x%x with error 0x%x\n",
- qp->qpn, err);
- return err;
- }
- }
- wait_for_completion(&dct->drained);
-destroy:
- if (need_cleanup)
- destroy_resource_common(dev, &dct->mqp);
- MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
- MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
- MLX5_SET(destroy_dct_in, in, uid, qp->uid);
- err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
- (void *)&out, sizeof(out));
- return err;
-}
-
-int mlx5_core_create_dct(struct mlx5_core_dev *dev,
- struct mlx5_core_dct *dct,
- u32 *in, int inlen,
- u32 *out, int outlen)
-{
- struct mlx5_core_qp *qp = &dct->mqp;
- int err;
-
- init_completion(&dct->drained);
- MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
-
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
- if (err) {
- mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
- return err;
- }
-
- qp->qpn = MLX5_GET(create_dct_out, out, dctn);
- qp->uid = MLX5_GET(create_dct_in, in, uid);
- err = create_resource_common(dev, qp, MLX5_RES_DCT);
- if (err)
- goto err_cmd;
-
- return 0;
-err_cmd:
- _mlx5_core_destroy_dct(dev, dct, false);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
-
-int mlx5_core_create_qp(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *qp,
- u32 *in, int inlen)
-{
- u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0};
- u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)];
- u32 din[MLX5_ST_SZ_DW(destroy_qp_in)];
- int err;
-
- MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
-
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (err)
- return err;
-
- qp->uid = MLX5_GET(create_qp_in, in, uid);
- qp->qpn = MLX5_GET(create_qp_out, out, qpn);
- mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
-
- err = create_resource_common(dev, qp, MLX5_RES_QP);
- if (err)
- goto err_cmd;
-
- err = mlx5_debug_qp_add(dev, qp);
- if (err)
- mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
- qp->qpn);
-
- atomic_inc(&dev->num_qps);
-
- return 0;
-
-err_cmd:
- memset(din, 0, sizeof(din));
- memset(dout, 0, sizeof(dout));
- MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
- MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
- MLX5_SET(destroy_qp_in, din, uid, qp->uid);
- mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
-
-static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
- struct mlx5_core_dct *dct)
-{
- u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0};
- struct mlx5_core_qp *qp = &dct->mqp;
-
- MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
- MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
- MLX5_SET(drain_dct_in, in, uid, qp->uid);
- return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
- (void *)&out, sizeof(out));
-}
-
-int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
- struct mlx5_core_dct *dct)
-{
- return _mlx5_core_destroy_dct(dev, dct, true);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
-
-int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *qp)
-{
- u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {0};
- int err;
-
- mlx5_debug_qp_remove(dev, qp);
-
- destroy_resource_common(dev, qp);
-
- MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
- MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
- MLX5_SET(destroy_qp_in, in, uid, qp->uid);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (err)
- return err;
-
- atomic_dec(&dev->num_qps);
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
-
-int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
- u32 timeout_usec)
-{
- u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0};
-
- MLX5_SET(set_delay_drop_params_in, in, opcode,
- MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
- MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
- timeout_usec / 100);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop);
-
-struct mbox_info {
- u32 *in;
- u32 *out;
- int inlen;
- int outlen;
-};
-
-static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
-{
- mbox->inlen = inlen;
- mbox->outlen = outlen;
- mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
- mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
- if (!mbox->in || !mbox->out) {
- kfree(mbox->in);
- kfree(mbox->out);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void mbox_free(struct mbox_info *mbox)
-{
- kfree(mbox->in);
- kfree(mbox->out);
-}
-
-static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
- u32 opt_param_mask, void *qpc,
- struct mbox_info *mbox, u16 uid)
-{
- mbox->out = NULL;
- mbox->in = NULL;
-
-#define MBOX_ALLOC(mbox, typ) \
- mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
-
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \
- do { \
- MLX5_SET(typ##_in, in, opcode, _opcode); \
- MLX5_SET(typ##_in, in, qpn, _qpn); \
- MLX5_SET(typ##_in, in, uid, _uid); \
- } while (0)
-
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \
- do { \
- MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \
- MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
- memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \
- MLX5_ST_SZ_BYTES(qpc)); \
- } while (0)
-
- switch (opcode) {
- /* 2RST & 2ERR */
- case MLX5_CMD_OP_2RST_QP:
- if (MBOX_ALLOC(mbox, qp_2rst))
- return -ENOMEM;
- MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
- break;
- case MLX5_CMD_OP_2ERR_QP:
- if (MBOX_ALLOC(mbox, qp_2err))
- return -ENOMEM;
- MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
- break;
-
- /* MODIFY with QPC */
- case MLX5_CMD_OP_RST2INIT_QP:
- if (MBOX_ALLOC(mbox, rst2init_qp))
- return -ENOMEM;
- MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc, uid);
- break;
- case MLX5_CMD_OP_INIT2RTR_QP:
- if (MBOX_ALLOC(mbox, init2rtr_qp))
- return -ENOMEM;
- MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc, uid);
- break;
- case MLX5_CMD_OP_RTR2RTS_QP:
- if (MBOX_ALLOC(mbox, rtr2rts_qp))
- return -ENOMEM;
- MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc, uid);
- break;
- case MLX5_CMD_OP_RTS2RTS_QP:
- if (MBOX_ALLOC(mbox, rts2rts_qp))
- return -ENOMEM;
- MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc, uid);
- break;
- case MLX5_CMD_OP_SQERR2RTS_QP:
- if (MBOX_ALLOC(mbox, sqerr2rts_qp))
- return -ENOMEM;
- MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc, uid);
- break;
- case MLX5_CMD_OP_INIT2INIT_QP:
- if (MBOX_ALLOC(mbox, init2init_qp))
- return -ENOMEM;
- MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc, uid);
- break;
- default:
- mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
- opcode, qpn);
- return -EINVAL;
- }
- return 0;
-}
-
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
- u32 opt_param_mask, void *qpc,
- struct mlx5_core_qp *qp)
-{
- struct mbox_info mbox;
- int err;
-
- err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
- opt_param_mask, qpc, &mbox, qp->uid);
- if (err)
- return err;
-
- err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen);
- mbox_free(&mbox);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
-
-void mlx5_init_qp_table(struct mlx5_core_dev *dev)
-{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
-
- memset(table, 0, sizeof(*table));
- spin_lock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
- mlx5_qp_debugfs_init(dev);
-
- table->nb.notifier_call = rsc_event_notifier;
- mlx5_notifier_register(dev, &table->nb);
-}
-
-void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
-{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
-
- mlx5_notifier_unregister(dev, &table->nb);
- mlx5_qp_debugfs_cleanup(dev);
-}
-
-int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
- u32 *out, int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0};
-
- MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
- MLX5_SET(query_qp_in, in, qpn, qp->qpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
-
-int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
- u32 *out, int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0};
- struct mlx5_core_qp *qp = &dct->mqp;
-
- MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
- MLX5_SET(query_dct_in, in, dctn, qp->qpn);
-
- return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
- (void *)out, outlen);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_dct_query);
-
-int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
-{
- u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {0};
- int err;
-
- MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
-
-int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
-{
- u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0};
-
- MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
- MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
-
-static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
-
- MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
- MLX5_SET(destroy_rq_in, in, rqn, rqn);
- MLX5_SET(destroy_rq_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
- struct mlx5_core_qp *rq)
-{
- int err;
- u32 rqn;
-
- err = mlx5_core_create_rq(dev, in, inlen, &rqn);
- if (err)
- return err;
-
- rq->uid = MLX5_GET(create_rq_in, in, uid);
- rq->qpn = rqn;
- err = create_resource_common(dev, rq, MLX5_RES_RQ);
- if (err)
- goto err_destroy_rq;
-
- return 0;
-
-err_destroy_rq:
- destroy_rq_tracked(dev, rq->qpn, rq->uid);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
-
-void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *rq)
-{
- destroy_resource_common(dev, rq);
- destroy_rq_tracked(dev, rq->qpn, rq->uid);
-}
-EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
-
-static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
-
- MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
- MLX5_SET(destroy_sq_in, in, sqn, sqn);
- MLX5_SET(destroy_sq_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
- struct mlx5_core_qp *sq)
-{
- int err;
- u32 sqn;
-
- err = mlx5_core_create_sq(dev, in, inlen, &sqn);
- if (err)
- return err;
-
- sq->uid = MLX5_GET(create_sq_in, in, uid);
- sq->qpn = sqn;
- err = create_resource_common(dev, sq, MLX5_RES_SQ);
- if (err)
- goto err_destroy_sq;
-
- return 0;
-
-err_destroy_sq:
- destroy_sq_tracked(dev, sq->qpn, sq->uid);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
-
-void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *sq)
-{
- destroy_resource_common(dev, sq);
- destroy_sq_tracked(dev, sq->qpn, sq->uid);
-}
-EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
-
-int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id)
-{
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
- int err;
-
- MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *counter_id = MLX5_GET(alloc_q_counter_out, out,
- counter_set_id);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter);
-
-int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id)
-{
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0};
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter);
-
-int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
- int reset, void *out, int out_size)
-{
- u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0};
-
- MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
- MLX5_SET(query_q_counter_in, in, clear, reset);
- MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
-
-struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
- int res_num,
- enum mlx5_res_type res_type)
-{
- u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
- struct mlx5_qp_table *table = &dev->priv.qp_table;
-
- return mlx5_get_rsc(table, rsn);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
-
-void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
-{
- mlx5_core_put_rsc(res);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 0fc7de4aa572..540cf05f6373 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -116,7 +116,7 @@ free:
static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
{
mlx5_core_roce_gid_set(dev, 0, 0, 0,
- NULL, NULL, false, 0, 0);
+ NULL, NULL, false, 0, 1);
}
static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
@@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
{
int err;
+ if (!MLX5_CAP_GEN(dev, roce))
+ return;
+
err = mlx5_nic_vport_enable_roce(dev);
if (err) {
mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
@@ -180,5 +183,4 @@ del_roce_addr:
mlx5_rdma_del_roce_addr(dev);
disable_roce:
mlx5_nic_vport_disable_roce(dev);
- return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 01c380425f9d..9f8b4005f4bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -31,17 +31,15 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
/* Scheduling element fw management */
int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
void *ctx, u32 *element_id)
{
- u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {};
void *schedc;
int err;
@@ -53,7 +51,7 @@ int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
hierarchy);
memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, create_scheduling_element, in, out);
if (err)
return err;
@@ -66,8 +64,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
void *ctx, u32 element_id,
u32 modify_bitmask)
{
- u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {};
void *schedc;
schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
@@ -82,14 +79,13 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
hierarchy);
memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_scheduling_element, in);
}
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id)
{
- u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {};
MLX5_SET(destroy_scheduling_element_in, in, opcode,
MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
@@ -98,7 +94,14 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
hierarchy);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, destroy_scheduling_element, in);
+}
+
+static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in,
+ u16 uid)
+{
+ return (!memcmp(entry->rl_raw, rl_in, sizeof(entry->rl_raw)) &&
+ entry->uid == uid);
}
/* Finds an entry where we can register the given rate
@@ -107,16 +110,29 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
* If the table is full, return NULL
*/
static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
- struct mlx5_rate_limit *rl)
+ void *rl_in, u16 uid, bool dedicated)
{
struct mlx5_rl_entry *ret_entry = NULL;
bool empty_found = false;
int i;
+ lockdep_assert_held(&table->rl_lock);
+ WARN_ON(!table->rl_entry);
+
for (i = 0; i < table->max_size; i++) {
- if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl))
- return &table->rl_entry[i];
- if (!empty_found && !table->rl_entry[i].rl.rate) {
+ if (dedicated) {
+ if (!table->rl_entry[i].refcount)
+ return &table->rl_entry[i];
+ continue;
+ }
+
+ if (table->rl_entry[i].refcount) {
+ if (table->rl_entry[i].dedicated)
+ continue;
+ if (mlx5_rl_are_equal_raw(&table->rl_entry[i], rl_in,
+ uid))
+ return &table->rl_entry[i];
+ } else if (!empty_found) {
empty_found = true;
ret_entry = &table->rl_entry[i];
}
@@ -126,19 +142,19 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
}
static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
- u16 index,
- struct mlx5_rate_limit *rl)
+ struct mlx5_rl_entry *entry, bool set)
{
- u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {};
+ void *pp_context;
+ pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx);
MLX5_SET(set_pp_rate_limit_in, in, opcode,
MLX5_CMD_OP_SET_PP_RATE_LIMIT);
- MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
- MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate);
- MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz);
- MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ MLX5_SET(set_pp_rate_limit_in, in, uid, entry->uid);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index);
+ if (set)
+ memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw));
+ return mlx5_cmd_exec_in(dev, set_pp_rate_limit, in);
}
bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
@@ -158,77 +174,189 @@ bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
}
EXPORT_SYMBOL(mlx5_rl_are_equal);
-int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
- struct mlx5_rate_limit *rl)
+static int mlx5_rl_table_get(struct mlx5_rl_table *table)
+{
+ int i;
+
+ lockdep_assert_held(&table->rl_lock);
+
+ if (table->rl_entry) {
+ table->refcount++;
+ return 0;
+ }
+
+ table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
+ GFP_KERNEL);
+ if (!table->rl_entry)
+ return -ENOMEM;
+
+ /* The index represents the index in HW rate limit table
+ * Index 0 is reserved for unlimited rate
+ */
+ for (i = 0; i < table->max_size; i++)
+ table->rl_entry[i].index = i + 1;
+
+ table->refcount++;
+ return 0;
+}
+
+static void mlx5_rl_table_put(struct mlx5_rl_table *table)
+{
+ lockdep_assert_held(&table->rl_lock);
+ if (--table->refcount)
+ return;
+
+ kfree(table->rl_entry);
+ table->rl_entry = NULL;
+}
+
+static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table)
+{
+ int i;
+
+ if (!table->rl_entry)
+ return;
+
+ /* Clear all configured rates */
+ for (i = 0; i < table->max_size; i++)
+ if (table->rl_entry[i].refcount)
+ mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], false);
+ kfree(table->rl_entry);
+}
+
+static void mlx5_rl_entry_get(struct mlx5_rl_entry *entry)
+{
+ entry->refcount++;
+}
+
+static void
+mlx5_rl_entry_put(struct mlx5_core_dev *dev, struct mlx5_rl_entry *entry)
+{
+ entry->refcount--;
+ if (!entry->refcount)
+ mlx5_set_pp_rate_limit_cmd(dev, entry, false);
+}
+
+int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
+ bool dedicated_entry, u16 *index)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
struct mlx5_rl_entry *entry;
- int err = 0;
+ u32 rate;
+ int err;
- mutex_lock(&table->rl_lock);
+ if (!table->max_size)
+ return -EOPNOTSUPP;
- if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) {
+ rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit);
+ if (!rate || !mlx5_rl_is_in_range(dev, rate)) {
mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
- rl->rate, table->min_rate, table->max_rate);
- err = -EINVAL;
- goto out;
+ rate, table->min_rate, table->max_rate);
+ return -EINVAL;
}
- entry = find_rl_entry(table, rl);
+ mutex_lock(&table->rl_lock);
+ err = mlx5_rl_table_get(table);
+ if (err)
+ goto out;
+
+ entry = find_rl_entry(table, rl_in, uid, dedicated_entry);
if (!entry) {
mlx5_core_err(dev, "Max number of %u rates reached\n",
table->max_size);
err = -ENOSPC;
- goto out;
+ goto rl_err;
}
- if (entry->refcount) {
- /* rate already configured */
- entry->refcount++;
- } else {
+ if (!entry->refcount) {
/* new rate limit */
- err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
+ memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw));
+ entry->uid = uid;
+ err = mlx5_set_pp_rate_limit_cmd(dev, entry, true);
if (err) {
- mlx5_core_err(dev, "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
- err, rl->rate, rl->max_burst_sz,
- rl->typical_pkt_sz);
- goto out;
+ mlx5_core_err(
+ dev,
+ "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, rate,
+ MLX5_GET(set_pp_rate_limit_context, rl_in,
+ burst_upper_bound),
+ MLX5_GET(set_pp_rate_limit_context, rl_in,
+ typical_packet_size));
+ goto rl_err;
}
- entry->rl = *rl;
- entry->refcount = 1;
+
+ entry->dedicated = dedicated_entry;
}
+ mlx5_rl_entry_get(entry);
*index = entry->index;
+ mutex_unlock(&table->rl_lock);
+ return 0;
+rl_err:
+ mlx5_rl_table_put(table);
out:
mutex_unlock(&table->rl_lock);
return err;
}
+EXPORT_SYMBOL(mlx5_rl_add_rate_raw);
+
+void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry;
+
+ mutex_lock(&table->rl_lock);
+ entry = &table->rl_entry[index - 1];
+ mlx5_rl_entry_put(dev, entry);
+ mlx5_rl_table_put(table);
+ mutex_unlock(&table->rl_lock);
+}
+EXPORT_SYMBOL(mlx5_rl_remove_rate_raw);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+ struct mlx5_rate_limit *rl)
+{
+ u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
+
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound,
+ rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size,
+ rl->typical_pkt_sz);
+
+ return mlx5_rl_add_rate_raw(dev, rl_raw,
+ MLX5_CAP_QOS(dev, packet_pacing_uid) ?
+ MLX5_SHARED_RESOURCE_UID : 0,
+ false, index);
+}
EXPORT_SYMBOL(mlx5_rl_add_rate);
void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
{
+ u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
struct mlx5_rl_table *table = &dev->priv.rl_table;
struct mlx5_rl_entry *entry = NULL;
- struct mlx5_rate_limit reset_rl = {0};
/* 0 is a reserved value for unlimited rate */
if (rl->rate == 0)
return;
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound,
+ rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size,
+ rl->typical_pkt_sz);
+
mutex_lock(&table->rl_lock);
- entry = find_rl_entry(table, rl);
+ entry = find_rl_entry(table, rl_raw,
+ MLX5_CAP_QOS(dev, packet_pacing_uid) ?
+ MLX5_SHARED_RESOURCE_UID : 0, false);
if (!entry || !entry->refcount) {
mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n",
rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
goto out;
}
-
- entry->refcount--;
- if (!entry->refcount) {
- /* need to remove rate */
- mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl);
- entry->rl = reset_rl;
- }
-
+ mlx5_rl_entry_put(dev, entry);
+ mlx5_rl_table_put(table);
out:
mutex_unlock(&table->rl_lock);
}
@@ -237,31 +365,19 @@ EXPORT_SYMBOL(mlx5_rl_remove_rate);
int mlx5_init_rl_table(struct mlx5_core_dev *dev)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
- int i;
- mutex_init(&table->rl_lock);
if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
table->max_size = 0;
return 0;
}
+ mutex_init(&table->rl_lock);
+
/* First entry is reserved for unlimited rate */
table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
- table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
- GFP_KERNEL);
- if (!table->rl_entry)
- return -ENOMEM;
-
- /* The index represents the index in HW rate limit table
- * Index 0 is reserved for unlimited rate
- */
- for (i = 0; i < table->max_size; i++)
- table->rl_entry[i].index = i + 1;
-
- /* Index 0 is reserved */
mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
table->max_size,
table->min_rate >> 10,
@@ -273,14 +389,10 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
- struct mlx5_rate_limit rl = {0};
- int i;
- /* Clear all configured rates */
- for (i = 0; i < table->max_size; i++)
- if (table->rl_entry[i].rl.rate)
- mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index,
- &rl);
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing))
+ return;
- kfree(dev->priv.rl_table.rl_entry);
+ mlx5_rl_table_free(dev, table);
+ mutex_destroy(&table->rl_lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c
new file mode 100644
index 000000000000..a8d75c2f0275
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include "priv.h"
+
+int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_sf_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_sf_in)] = {};
+
+ MLX5_SET(alloc_sf_in, in, opcode, MLX5_CMD_OP_ALLOC_SF);
+ MLX5_SET(alloc_sf_in, in, function_id, function_id);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_sf_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_sf_in)] = {};
+
+ MLX5_SET(dealloc_sf_in, in, opcode, MLX5_CMD_OP_DEALLOC_SF);
+ MLX5_SET(dealloc_sf_in, in, function_id, function_id);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
new file mode 100644
index 000000000000..7da012ff0d41
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include "mlx5_core.h"
+#include "dev.h"
+#include "sf/vhca_event.h"
+#include "sf/sf.h"
+#include "sf/mlx5_ifc_vhca_event.h"
+#include "ecpf.h"
+#define CREATE_TRACE_POINTS
+#include "diag/dev_tracepoint.h"
+
+struct mlx5_sf_dev_table {
+ struct xarray devices;
+ unsigned int max_sfs;
+ phys_addr_t base_address;
+ u64 sf_bar_length;
+ struct notifier_block nb;
+ struct mlx5_core_dev *dev;
+};
+
+static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev);
+}
+
+bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+ return table && !xa_empty(&table->devices);
+}
+
+static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+
+ return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
+}
+static DEVICE_ATTR_RO(sfnum);
+
+static struct attribute *sf_device_attrs[] = {
+ &dev_attr_sfnum.attr,
+ NULL,
+};
+
+static const struct attribute_group sf_attr_group = {
+ .attrs = sf_device_attrs,
+};
+
+static const struct attribute_group *sf_attr_groups[2] = {
+ &sf_attr_group,
+ NULL
+};
+
+static void mlx5_sf_dev_release(struct device *device)
+{
+ struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev);
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+
+ mlx5_adev_idx_free(adev->id);
+ kfree(sf_dev);
+}
+
+static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev)
+{
+ int id;
+
+ id = sf_dev->adev.id;
+ trace_mlx5_sf_dev_del(dev, sf_dev, id);
+
+ auxiliary_device_delete(&sf_dev->adev);
+ auxiliary_device_uninit(&sf_dev->adev);
+}
+
+static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+ struct mlx5_sf_dev *sf_dev;
+ struct pci_dev *pdev;
+ int err;
+ int id;
+
+ id = mlx5_adev_idx_alloc();
+ if (id < 0) {
+ err = id;
+ goto add_err;
+ }
+
+ sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL);
+ if (!sf_dev) {
+ mlx5_adev_idx_free(id);
+ err = -ENOMEM;
+ goto add_err;
+ }
+ pdev = dev->pdev;
+ sf_dev->adev.id = id;
+ sf_dev->adev.name = MLX5_SF_DEV_ID_NAME;
+ sf_dev->adev.dev.release = mlx5_sf_dev_release;
+ sf_dev->adev.dev.parent = &pdev->dev;
+ sf_dev->adev.dev.groups = sf_attr_groups;
+ sf_dev->sfnum = sfnum;
+ sf_dev->parent_mdev = dev;
+ sf_dev->fn_id = fn_id;
+
+ if (!table->max_sfs) {
+ mlx5_adev_idx_free(id);
+ kfree(sf_dev);
+ err = -EOPNOTSUPP;
+ goto add_err;
+ }
+ sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length);
+
+ trace_mlx5_sf_dev_add(dev, sf_dev, id);
+
+ err = auxiliary_device_init(&sf_dev->adev);
+ if (err) {
+ mlx5_adev_idx_free(id);
+ kfree(sf_dev);
+ goto add_err;
+ }
+
+ err = auxiliary_device_add(&sf_dev->adev);
+ if (err) {
+ put_device(&sf_dev->adev.dev);
+ goto add_err;
+ }
+
+ err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL);
+ if (err)
+ goto xa_err;
+ return;
+
+xa_err:
+ mlx5_sf_dev_remove(dev, sf_dev);
+add_err:
+ mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
+ sf_index, sfnum, err);
+}
+
+static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+ xa_erase(&table->devices, sf_index);
+ mlx5_sf_dev_remove(dev, sf_dev);
+}
+
+static int
+mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data)
+{
+ struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
+ const struct mlx5_vhca_state_event *event = data;
+ struct mlx5_sf_dev *sf_dev;
+ u16 max_functions;
+ u16 sf_index;
+ u16 base_id;
+
+ max_functions = mlx5_sf_max_functions(table->dev);
+ if (!max_functions)
+ return 0;
+
+ base_id = MLX5_CAP_GEN(table->dev, sf_base_id);
+ if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
+ return 0;
+
+ sf_index = event->function_id - base_id;
+ sf_dev = xa_load(&table->devices, sf_index);
+ switch (event->new_vhca_state) {
+ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ if (sf_dev)
+ mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
+ break;
+ case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
+ if (sf_dev)
+ mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
+ else
+ mlx5_core_err(table->dev,
+ "SF DEV: teardown state for invalid dev index=%d fn_id=0x%x\n",
+ sf_index, event->sw_function_id);
+ break;
+ case MLX5_VHCA_STATE_ACTIVE:
+ if (!sf_dev)
+ mlx5_sf_dev_add(table->dev, sf_index, event->function_id,
+ event->sw_function_id);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
+{
+ struct mlx5_core_dev *dev = table->dev;
+ u16 max_functions;
+ u16 function_id;
+ int err = 0;
+ int i;
+
+ max_functions = mlx5_sf_max_functions(dev);
+ function_id = MLX5_CAP_GEN(dev, sf_base_id);
+ /* Arm the vhca context as the vhca event notifier */
+ for (i = 0; i < max_functions; i++) {
+ err = mlx5_vhca_event_arm(dev, function_id);
+ if (err)
+ return err;
+
+ function_id++;
+ }
+ return 0;
+}
+
+void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_dev_table *table;
+ unsigned int max_sfs;
+ int err;
+
+ if (!mlx5_sf_dev_supported(dev) || !mlx5_vhca_event_supported(dev))
+ return;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table) {
+ err = -ENOMEM;
+ goto table_err;
+ }
+
+ table->nb.notifier_call = mlx5_sf_dev_state_change_handler;
+ table->dev = dev;
+ if (MLX5_CAP_GEN(dev, max_num_sf))
+ max_sfs = MLX5_CAP_GEN(dev, max_num_sf);
+ else
+ max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf);
+ table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12);
+ table->base_address = pci_resource_start(dev->pdev, 2);
+ table->max_sfs = max_sfs;
+ xa_init(&table->devices);
+ dev->priv.sf_dev_table = table;
+
+ err = mlx5_vhca_event_notifier_register(dev, &table->nb);
+ if (err)
+ goto vhca_err;
+ err = mlx5_sf_dev_vhca_arm_all(table);
+ if (err)
+ goto arm_err;
+ mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs);
+ return;
+
+arm_err:
+ mlx5_vhca_event_notifier_unregister(dev, &table->nb);
+vhca_err:
+ table->max_sfs = 0;
+ kfree(table);
+ dev->priv.sf_dev_table = NULL;
+table_err:
+ mlx5_core_err(dev, "SF DEV table create err = %d\n", err);
+}
+
+static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
+{
+ struct mlx5_sf_dev *sf_dev;
+ unsigned long index;
+
+ xa_for_each(&table->devices, index, sf_dev) {
+ xa_erase(&table->devices, index);
+ mlx5_sf_dev_remove(table->dev, sf_dev);
+ }
+}
+
+void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+ if (!table)
+ return;
+
+ mlx5_vhca_event_notifier_unregister(dev, &table->nb);
+
+ /* Now that event handler is not running, it is safe to destroy
+ * the sf device without race.
+ */
+ mlx5_sf_dev_destroy_all(table);
+
+ WARN_ON(!xa_empty(&table->devices));
+ kfree(table);
+ dev->priv.sf_dev_table = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
new file mode 100644
index 000000000000..2a66a427ef15
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_SF_DEV_H__
+#define __MLX5_SF_DEV_H__
+
+#ifdef CONFIG_MLX5_SF
+
+#include <linux/auxiliary_bus.h>
+
+#define MLX5_SF_DEV_ID_NAME "sf"
+
+struct mlx5_sf_dev {
+ struct auxiliary_device adev;
+ struct mlx5_core_dev *parent_mdev;
+ struct mlx5_core_dev *mdev;
+ phys_addr_t bar_base_addr;
+ u32 sfnum;
+ u16 fn_id;
+};
+
+void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev);
+void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_sf_driver_register(void);
+void mlx5_sf_driver_unregister(void);
+
+bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev);
+
+#else
+
+static inline void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
+{
+}
+
+static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
+{
+}
+
+static inline int mlx5_sf_driver_register(void)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_driver_unregister(void)
+{
+}
+
+static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
new file mode 100644
index 000000000000..7f7c9af5deed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_SF_DEV_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_SF_DEV_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/mlx5/driver.h>
+#include "../../dev/dev.h"
+
+DECLARE_EVENT_CLASS(mlx5_sf_dev_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_sf_dev *sfdev,
+ int aux_id),
+ TP_ARGS(dev, sfdev, aux_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const struct mlx5_sf_dev*, sfdev)
+ __field(int, aux_id)
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->sfdev = sfdev;
+ __entry->aux_id = aux_id;
+ __entry->hw_fn_id = sfdev->fn_id;
+ __entry->sfnum = sfdev->sfnum;
+ ),
+ TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n",
+ __get_str(devname), __entry->sfdev,
+ __entry->aux_id, __entry->hw_fn_id,
+ __entry->sfnum)
+);
+
+DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_add,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_sf_dev *sfdev,
+ int aux_id),
+ TP_ARGS(dev, sfdev, aux_id)
+ );
+
+DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_del,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_sf_dev *sfdev,
+ int aux_id),
+ TP_ARGS(dev, sfdev, aux_id)
+ );
+
+#endif /* _MLX5_SF_DEV_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sf/dev/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE dev_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
new file mode 100644
index 000000000000..7b4783ce213e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include "mlx5_core.h"
+#include "dev.h"
+#include "devlink.h"
+
+static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id)
+{
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+ struct mlx5_core_dev *mdev;
+ struct devlink *devlink;
+ int err;
+
+ devlink = mlx5_devlink_alloc(&adev->dev);
+ if (!devlink)
+ return -ENOMEM;
+
+ mdev = devlink_priv(devlink);
+ mdev->device = &adev->dev;
+ mdev->pdev = sf_dev->parent_mdev->pdev;
+ mdev->bar_addr = sf_dev->bar_base_addr;
+ mdev->iseg_base = sf_dev->bar_base_addr;
+ mdev->coredev_type = MLX5_COREDEV_SF;
+ mdev->priv.parent_mdev = sf_dev->parent_mdev;
+ mdev->priv.adev_idx = adev->id;
+ sf_dev->mdev = mdev;
+
+ err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err);
+ goto mdev_err;
+ }
+
+ mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg));
+ if (!mdev->iseg) {
+ mlx5_core_warn(mdev, "remap error\n");
+ err = -ENOMEM;
+ goto remap_err;
+ }
+
+ err = mlx5_init_one(mdev);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
+ goto init_one_err;
+ }
+ devlink_register(devlink);
+ return 0;
+
+init_one_err:
+ iounmap(mdev->iseg);
+remap_err:
+ mlx5_mdev_uninit(mdev);
+mdev_err:
+ mlx5_devlink_free(devlink);
+ return err;
+}
+
+static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+ struct devlink *devlink = priv_to_devlink(sf_dev->mdev);
+
+ devlink_unregister(devlink);
+ mlx5_uninit_one(sf_dev->mdev);
+ iounmap(sf_dev->mdev->iseg);
+ mlx5_mdev_uninit(sf_dev->mdev);
+ mlx5_devlink_free(devlink);
+}
+
+static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
+{
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+
+ mlx5_unload_one(sf_dev->mdev);
+}
+
+static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
+ { .name = MLX5_ADEV_NAME "." MLX5_SF_DEV_ID_NAME, },
+ { },
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5_sf_dev_id_table);
+
+static struct auxiliary_driver mlx5_sf_driver = {
+ .name = MLX5_SF_DEV_ID_NAME,
+ .probe = mlx5_sf_dev_probe,
+ .remove = mlx5_sf_dev_remove,
+ .shutdown = mlx5_sf_dev_shutdown,
+ .id_table = mlx5_sf_dev_id_table,
+};
+
+int mlx5_sf_driver_register(void)
+{
+ return auxiliary_driver_register(&mlx5_sf_driver);
+}
+
+void mlx5_sf_driver_unregister(void)
+{
+ auxiliary_driver_unregister(&mlx5_sf_driver);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
new file mode 100644
index 000000000000..7d955a4d9f14
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include "eswitch.h"
+#include "priv.h"
+#include "sf/dev/dev.h"
+#include "mlx5_ifc_vhca_event.h"
+#include "vhca_event.h"
+#include "ecpf.h"
+#define CREATE_TRACE_POINTS
+#include "diag/sf_tracepoint.h"
+
+struct mlx5_sf {
+ struct devlink_port dl_port;
+ unsigned int port_index;
+ u32 controller;
+ u16 id;
+ u16 hw_fn_id;
+ u16 hw_state;
+};
+
+struct mlx5_sf_table {
+ struct mlx5_core_dev *dev; /* To refer from notifier context. */
+ struct xarray port_indices; /* port index based lookup. */
+ refcount_t refcount;
+ struct completion disable_complete;
+ struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */
+ struct notifier_block esw_nb;
+ struct notifier_block vhca_nb;
+ u8 ecpu: 1;
+};
+
+static struct mlx5_sf *
+mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index)
+{
+ return xa_load(&table->port_indices, port_index);
+}
+
+static struct mlx5_sf *
+mlx5_sf_lookup_by_function_id(struct mlx5_sf_table *table, unsigned int fn_id)
+{
+ unsigned long index;
+ struct mlx5_sf *sf;
+
+ xa_for_each(&table->port_indices, index, sf) {
+ if (sf->hw_fn_id == fn_id)
+ return sf;
+ }
+ return NULL;
+}
+
+static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ return xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL);
+}
+
+static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ xa_erase(&table->port_indices, sf->port_index);
+}
+
+static struct mlx5_sf *
+mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw,
+ u32 controller, u32 sfnum, struct netlink_ext_ack *extack)
+{
+ unsigned int dl_port_index;
+ struct mlx5_sf *sf;
+ u16 hw_fn_id;
+ int id_err;
+ int err;
+
+ if (!mlx5_esw_offloads_controller_valid(esw, controller)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid controller number");
+ return ERR_PTR(-EINVAL);
+ }
+
+ id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum);
+ if (id_err < 0) {
+ err = id_err;
+ goto id_err;
+ }
+
+ sf = kzalloc(sizeof(*sf), GFP_KERNEL);
+ if (!sf) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+ sf->id = id_err;
+ hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id);
+ sf->port_index = dl_port_index;
+ sf->hw_fn_id = hw_fn_id;
+ sf->hw_state = MLX5_VHCA_STATE_ALLOCATED;
+ sf->controller = controller;
+
+ err = mlx5_sf_id_insert(table, sf);
+ if (err)
+ goto insert_err;
+
+ return sf;
+
+insert_err:
+ kfree(sf);
+alloc_err:
+ mlx5_sf_hw_table_sf_free(table->dev, controller, id_err);
+id_err:
+ if (err == -EEXIST)
+ NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum");
+ return ERR_PTR(err);
+}
+
+static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ mlx5_sf_id_erase(table, sf);
+ mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id);
+ trace_mlx5_sf_free(table->dev, sf->port_index, sf->controller, sf->hw_fn_id);
+ kfree(sf);
+}
+
+static struct mlx5_sf_table *mlx5_sf_table_try_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table = dev->priv.sf_table;
+
+ if (!table)
+ return NULL;
+
+ return refcount_inc_not_zero(&table->refcount) ? table : NULL;
+}
+
+static void mlx5_sf_table_put(struct mlx5_sf_table *table)
+{
+ if (refcount_dec_and_test(&table->refcount))
+ complete(&table->disable_complete);
+}
+
+static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state)
+{
+ switch (hw_state) {
+ case MLX5_VHCA_STATE_ACTIVE:
+ case MLX5_VHCA_STATE_IN_USE:
+ return DEVLINK_PORT_FN_STATE_ACTIVE;
+ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
+ default:
+ return DEVLINK_PORT_FN_STATE_INACTIVE;
+ }
+}
+
+static enum devlink_port_fn_opstate mlx5_sf_to_devlink_opstate(u8 hw_state)
+{
+ switch (hw_state) {
+ case MLX5_VHCA_STATE_IN_USE:
+ case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
+ return DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ case MLX5_VHCA_STATE_ACTIVE:
+ default:
+ return DEVLINK_PORT_FN_OPSTATE_DETACHED;
+ }
+}
+
+static bool mlx5_sf_is_active(const struct mlx5_sf *sf)
+{
+ return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
+}
+
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
+ enum devlink_port_fn_state *state,
+ enum devlink_port_fn_opstate *opstate,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
+ struct mlx5_sf_table *table;
+ struct mlx5_sf *sf;
+ int err = 0;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table)
+ return -EOPNOTSUPP;
+
+ sf = mlx5_sf_lookup_by_index(table, dl_port->index);
+ if (!sf) {
+ err = -EOPNOTSUPP;
+ goto sf_err;
+ }
+ mutex_lock(&table->sf_state_lock);
+ *state = mlx5_sf_to_devlink_state(sf->hw_state);
+ *opstate = mlx5_sf_to_devlink_opstate(sf->hw_state);
+ mutex_unlock(&table->sf_state_lock);
+sf_err:
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (mlx5_sf_is_active(sf))
+ return 0;
+ if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) {
+ NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached");
+ return -EBUSY;
+ }
+
+ err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id);
+ if (err)
+ return err;
+
+ sf->hw_state = MLX5_VHCA_STATE_ACTIVE;
+ trace_mlx5_sf_activate(dev, sf->port_index, sf->controller, sf->hw_fn_id);
+ return 0;
+}
+
+static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf)
+{
+ int err;
+
+ if (!mlx5_sf_is_active(sf))
+ return 0;
+
+ err = mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id);
+ if (err)
+ return err;
+
+ sf->hw_state = MLX5_VHCA_STATE_TEARDOWN_REQUEST;
+ trace_mlx5_sf_deactivate(dev, sf->port_index, sf->controller, sf->hw_fn_id);
+ return 0;
+}
+
+static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
+ struct mlx5_sf *sf,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ mutex_lock(&table->sf_state_lock);
+ if (state == mlx5_sf_to_devlink_state(sf->hw_state))
+ goto out;
+ if (state == DEVLINK_PORT_FN_STATE_ACTIVE)
+ err = mlx5_sf_activate(dev, sf, extack);
+ else if (state == DEVLINK_PORT_FN_STATE_INACTIVE)
+ err = mlx5_sf_deactivate(dev, sf);
+ else
+ err = -EINVAL;
+out:
+ mutex_unlock(&table->sf_state_lock);
+ return err;
+}
+
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
+ struct mlx5_sf_table *table;
+ struct mlx5_sf *sf;
+ int err;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port state set is only supported in eswitch switchdev mode or SF ports are disabled.");
+ return -EOPNOTSUPP;
+ }
+ sf = mlx5_sf_lookup_by_index(table, dl_port->index);
+ if (!sf) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5_sf_state_set(dev, table, sf, state, extack);
+out:
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
+ const struct devlink_port_new_attrs *new_attr,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_sf *sf;
+ int err;
+
+ sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack);
+ if (IS_ERR(sf))
+ return PTR_ERR(sf);
+
+ err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id,
+ new_attr->controller, new_attr->sfnum);
+ if (err)
+ goto esw_err;
+ *new_port_index = sf->port_index;
+ trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum);
+ return 0;
+
+esw_err:
+ mlx5_sf_free(table, sf);
+ return err;
+}
+
+static int
+mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr,
+ struct netlink_ext_ack *extack)
+{
+ if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
+ NL_SET_ERR_MSG_MOD(extack, "Driver supports only SF port addition");
+ return -EOPNOTSUPP;
+ }
+ if (new_attr->port_index_valid) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Driver does not support user defined port index assignment");
+ return -EOPNOTSUPP;
+ }
+ if (!new_attr->sfnum_valid) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "User must provide unique sfnum. Driver does not support auto assignment");
+ return -EOPNOTSUPP;
+ }
+ if (new_attr->controller_valid && new_attr->controller &&
+ !mlx5_core_is_ecpf_esw_manager(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported");
+ return -EOPNOTSUPP;
+ }
+ if (new_attr->pfnum != mlx5_get_dev_index(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+int mlx5_devlink_sf_port_new(struct devlink *devlink,
+ const struct devlink_port_new_attrs *new_attr,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_sf_table *table;
+ int err;
+
+ err = mlx5_sf_new_check_attr(dev, new_attr, extack);
+ if (err)
+ return err;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port add is only supported in eswitch switchdev mode or SF ports are disabled.");
+ return -EOPNOTSUPP;
+ }
+ err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index);
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ if (sf->hw_state == MLX5_VHCA_STATE_ALLOCATED) {
+ mlx5_sf_free(table, sf);
+ } else if (mlx5_sf_is_active(sf)) {
+ /* Even if its active, it is treated as in_use because by the time,
+ * it is disabled here, it may getting used. So it is safe to
+ * always look for the event to ensure that it is recycled only after
+ * firmware gives confirmation that it is detached by the driver.
+ */
+ mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id);
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
+ kfree(sf);
+ } else {
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
+ kfree(sf);
+ }
+}
+
+int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_sf_table *table;
+ struct mlx5_sf *sf;
+ int err = 0;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port del is only supported in eswitch switchdev mode or SF ports are disabled.");
+ return -EOPNOTSUPP;
+ }
+ sf = mlx5_sf_lookup_by_index(table, port_index);
+ if (!sf) {
+ err = -ENODEV;
+ goto sf_err;
+ }
+
+ mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id);
+ mlx5_sf_id_erase(table, sf);
+
+ mutex_lock(&table->sf_state_lock);
+ mlx5_sf_dealloc(table, sf);
+ mutex_unlock(&table->sf_state_lock);
+sf_err:
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static bool mlx5_sf_state_update_check(const struct mlx5_sf *sf, u8 new_state)
+{
+ if (sf->hw_state == MLX5_VHCA_STATE_ACTIVE && new_state == MLX5_VHCA_STATE_IN_USE)
+ return true;
+
+ if (sf->hw_state == MLX5_VHCA_STATE_IN_USE && new_state == MLX5_VHCA_STATE_ACTIVE)
+ return true;
+
+ if (sf->hw_state == MLX5_VHCA_STATE_TEARDOWN_REQUEST &&
+ new_state == MLX5_VHCA_STATE_ALLOCATED)
+ return true;
+
+ return false;
+}
+
+static int mlx5_sf_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data)
+{
+ struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, vhca_nb);
+ const struct mlx5_vhca_state_event *event = data;
+ bool update = false;
+ struct mlx5_sf *sf;
+
+ table = mlx5_sf_table_try_get(table->dev);
+ if (!table)
+ return 0;
+
+ mutex_lock(&table->sf_state_lock);
+ sf = mlx5_sf_lookup_by_function_id(table, event->function_id);
+ if (!sf)
+ goto sf_err;
+
+ /* When driver is attached or detached to a function, an event
+ * notifies such state change.
+ */
+ update = mlx5_sf_state_update_check(sf, event->new_vhca_state);
+ if (update)
+ sf->hw_state = event->new_vhca_state;
+ trace_mlx5_sf_update_state(table->dev, sf->port_index, sf->controller,
+ sf->hw_fn_id, sf->hw_state);
+sf_err:
+ mutex_unlock(&table->sf_state_lock);
+ mlx5_sf_table_put(table);
+ return 0;
+}
+
+static void mlx5_sf_table_enable(struct mlx5_sf_table *table)
+{
+ init_completion(&table->disable_complete);
+ refcount_set(&table->refcount, 1);
+}
+
+static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table)
+{
+ struct mlx5_eswitch *esw = table->dev->priv.eswitch;
+ unsigned long index;
+ struct mlx5_sf *sf;
+
+ /* At this point, no new user commands can start and no vhca event can
+ * arrive. It is safe to destroy all user created SFs.
+ */
+ xa_for_each(&table->port_indices, index, sf) {
+ mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id);
+ mlx5_sf_id_erase(table, sf);
+ mlx5_sf_dealloc(table, sf);
+ }
+}
+
+static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
+{
+ if (!refcount_read(&table->refcount))
+ return;
+
+ /* Balances with refcount_set; drop the reference so that new user cmd cannot start
+ * and new vhca event handler cannot run.
+ */
+ mlx5_sf_table_put(table);
+ wait_for_completion(&table->disable_complete);
+
+ mlx5_sf_deactivate_all(table);
+}
+
+static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, esw_nb);
+ const struct mlx5_esw_event_info *mode = data;
+
+ switch (mode->new_mode) {
+ case MLX5_ESWITCH_OFFLOADS:
+ mlx5_sf_table_enable(table);
+ break;
+ case MLX5_ESWITCH_LEGACY:
+ mlx5_sf_table_disable(table);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
+{
+ return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_sf_hw_table_supported(dev);
+}
+
+int mlx5_sf_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table;
+ int err;
+
+ if (!mlx5_sf_table_supported(dev) || !mlx5_vhca_event_supported(dev))
+ return 0;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ mutex_init(&table->sf_state_lock);
+ table->dev = dev;
+ xa_init(&table->port_indices);
+ dev->priv.sf_table = table;
+ refcount_set(&table->refcount, 0);
+ table->esw_nb.notifier_call = mlx5_sf_esw_event;
+ err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb);
+ if (err)
+ goto reg_err;
+
+ table->vhca_nb.notifier_call = mlx5_sf_vhca_event;
+ err = mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb);
+ if (err)
+ goto vhca_err;
+
+ return 0;
+
+vhca_err:
+ mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb);
+reg_err:
+ mutex_destroy(&table->sf_state_lock);
+ kfree(table);
+ dev->priv.sf_table = NULL;
+ return err;
+}
+
+void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table = dev->priv.sf_table;
+
+ if (!table)
+ return;
+
+ mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb);
+ mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb);
+ WARN_ON(refcount_read(&table->refcount));
+ mutex_destroy(&table->sf_state_lock);
+ WARN_ON(!xa_empty(&table->port_indices));
+ kfree(table);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h
new file mode 100644
index 000000000000..8bf1cd90930d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_SF_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_SF_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/mlx5/driver.h>
+#include "sf/vhca_event.h"
+
+TRACE_EVENT(mlx5_sf_add,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ unsigned int port_index,
+ u32 controller,
+ u16 hw_fn_id,
+ u32 sfnum),
+ TP_ARGS(dev, port_index, controller, hw_fn_id, sfnum),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ __entry->sfnum = sfnum;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x sfnum=%u\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id, __entry->sfnum)
+);
+
+TRACE_EVENT(mlx5_sf_free,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ unsigned int port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id)
+);
+
+TRACE_EVENT(mlx5_sf_hwc_alloc,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 controller,
+ u16 hw_fn_id,
+ u32 sfnum),
+ TP_ARGS(dev, controller, hw_fn_id, sfnum),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ __entry->sfnum = sfnum;
+ ),
+ TP_printk("(%s) controller=%u hw_id=0x%x sfnum=%u\n",
+ __get_str(devname), __entry->controller, __entry->hw_fn_id,
+ __entry->sfnum)
+);
+
+TRACE_EVENT(mlx5_sf_hwc_free,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u16 hw_fn_id),
+ TP_ARGS(dev, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u16, hw_fn_id)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id)
+);
+
+TRACE_EVENT(mlx5_sf_hwc_deferred_free,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u16 hw_fn_id),
+ TP_ARGS(dev, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u16, hw_fn_id)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id)
+);
+
+DECLARE_EVENT_CLASS(mlx5_sf_state_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id)
+);
+
+DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_activate,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id)
+ );
+
+DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_deactivate,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id)
+ );
+
+TRACE_EVENT(mlx5_sf_update_state,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ unsigned int port_index,
+ u32 controller,
+ u16 hw_fn_id,
+ u8 state),
+ TP_ARGS(dev, port_index, controller, hw_fn_id, state),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ __field(u8, state)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ __entry->state = state;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x state=%u\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id, __entry->state)
+);
+
+#endif /* _MLX5_SF_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sf/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE sf_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h
new file mode 100644
index 000000000000..fd814a190b8b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_SF_VHCA_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_SF_VHCA_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/mlx5/driver.h>
+#include "sf/vhca_event.h"
+
+TRACE_EVENT(mlx5_sf_vhca_event,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_vhca_state_event *event),
+ TP_ARGS(dev, event),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ __field(u8, vhca_state)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->hw_fn_id = event->function_id;
+ __entry->sfnum = event->sw_function_id;
+ __entry->vhca_state = event->new_vhca_state;
+ ),
+ TP_printk("(%s) hw_id=0x%x sfnum=%u vhca_state=%d\n",
+ __get_str(devname), __entry->hw_fn_id,
+ __entry->sfnum, __entry->vhca_state)
+);
+
+#endif /* _MLX5_SF_VHCA_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sf/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE vhca_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
new file mode 100644
index 000000000000..17aa348989cb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+#include <linux/mlx5/driver.h>
+#include "vhca_event.h"
+#include "priv.h"
+#include "sf.h"
+#include "mlx5_ifc_vhca_event.h"
+#include "ecpf.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "diag/sf_tracepoint.h"
+
+struct mlx5_sf_hw {
+ u32 usr_sfnum;
+ u8 allocated: 1;
+ u8 pending_delete: 1;
+};
+
+struct mlx5_sf_hwc_table {
+ struct mlx5_sf_hw *sfs;
+ int max_fn;
+ u16 start_fn_id;
+};
+
+enum mlx5_sf_hwc_index {
+ MLX5_SF_HWC_LOCAL,
+ MLX5_SF_HWC_EXTERNAL,
+ MLX5_SF_HWC_MAX,
+};
+
+struct mlx5_sf_hw_table {
+ struct mlx5_core_dev *dev;
+ struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
+ struct notifier_block vhca_nb;
+ struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX];
+};
+
+static struct mlx5_sf_hwc_table *
+mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller)
+{
+ int idx = !!controller;
+
+ return &dev->priv.sf_hw_table->hwc[idx];
+}
+
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id)
+{
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
+ return hwc->start_fn_id + sw_id;
+}
+
+static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id)
+{
+ return hw_id - hwc->start_fn_id;
+}
+
+static struct mlx5_sf_hwc_table *
+mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(table->hwc); i++) {
+ if (table->hwc[i].max_fn &&
+ fn_id >= table->hwc[i].start_fn_id &&
+ fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn))
+ return &table->hwc[i];
+ }
+ return NULL;
+}
+
+static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller,
+ u32 usr_sfnum)
+{
+ struct mlx5_sf_hwc_table *hwc;
+ int free_idx = -1;
+ int i;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ if (!hwc->sfs)
+ return -ENOSPC;
+
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (!hwc->sfs[i].allocated && free_idx == -1) {
+ free_idx = i;
+ continue;
+ }
+
+ if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum)
+ return -EEXIST;
+ }
+
+ if (free_idx == -1)
+ return -ENOSPC;
+
+ hwc->sfs[free_idx].usr_sfnum = usr_sfnum;
+ hwc->sfs[free_idx].allocated = true;
+ return free_idx;
+}
+
+static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id)
+{
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ hwc->sfs[id].allocated = false;
+ hwc->sfs[id].pending_delete = false;
+}
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u16 hw_fn_id;
+ int sw_id;
+ int err;
+
+ if (!table)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&table->table_lock);
+ sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum);
+ if (sw_id < 0) {
+ err = sw_id;
+ goto exist_err;
+ }
+
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id);
+ err = mlx5_cmd_alloc_sf(dev, hw_fn_id);
+ if (err)
+ goto err;
+
+ err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum);
+ if (err)
+ goto vhca_err;
+
+ if (controller) {
+ /* If this SF is for external controller, SF manager
+ * needs to arm firmware to receive the events.
+ */
+ err = mlx5_vhca_event_arm(dev, hw_fn_id);
+ if (err)
+ goto vhca_err;
+ }
+
+ trace_mlx5_sf_hwc_alloc(dev, controller, hw_fn_id, usr_sfnum);
+ mutex_unlock(&table->table_lock);
+ return sw_id;
+
+vhca_err:
+ mlx5_cmd_dealloc_sf(dev, hw_fn_id);
+err:
+ mlx5_sf_hw_table_id_free(table, controller, sw_id);
+exist_err:
+ mutex_unlock(&table->table_lock);
+ return err;
+}
+
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u16 hw_fn_id;
+
+ mutex_lock(&table->table_lock);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+ mlx5_cmd_dealloc_sf(dev, hw_fn_id);
+ mlx5_sf_hw_table_id_free(table, controller, id);
+ mutex_unlock(&table->table_lock);
+}
+
+static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc, int idx)
+{
+ mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx);
+ hwc->sfs[idx].allocated = false;
+ hwc->sfs[idx].pending_delete = false;
+ trace_mlx5_sf_hwc_free(dev, hwc->start_fn_id + idx);
+}
+
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ struct mlx5_sf_hwc_table *hwc;
+ u16 hw_fn_id;
+ u8 state;
+ int err;
+
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
+ mutex_lock(&table->table_lock);
+ err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
+ if (err)
+ goto err;
+ state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
+ if (state == MLX5_VHCA_STATE_ALLOCATED) {
+ mlx5_cmd_dealloc_sf(dev, hw_fn_id);
+ hwc->sfs[id].allocated = false;
+ } else {
+ hwc->sfs[id].pending_delete = true;
+ trace_mlx5_sf_hwc_deferred_free(dev, hw_fn_id);
+ }
+err:
+ mutex_unlock(&table->table_lock);
+}
+
+static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc)
+{
+ int i;
+
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (hwc->sfs[i].allocated)
+ mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i);
+ }
+}
+
+static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table)
+{
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]);
+}
+
+static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id)
+{
+ struct mlx5_sf_hw *sfs;
+
+ if (!max_fn)
+ return 0;
+
+ sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL);
+ if (!sfs)
+ return -ENOMEM;
+
+ hwc->sfs = sfs;
+ hwc->max_fn = max_fn;
+ hwc->start_fn_id = base_id;
+ return 0;
+}
+
+static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc)
+{
+ kfree(hwc->sfs);
+}
+
+int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table;
+ u16 max_ext_fn = 0;
+ u16 ext_base_id = 0;
+ u16 max_fn = 0;
+ u16 base_id;
+ int err;
+
+ if (!mlx5_vhca_event_supported(dev))
+ return 0;
+
+ if (mlx5_sf_supported(dev))
+ max_fn = mlx5_sf_max_functions(dev);
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id);
+ if (err)
+ return err;
+
+ if (!max_fn && !max_ext_fn)
+ return 0;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ mutex_init(&table->table_lock);
+ table->dev = dev;
+ dev->priv.sf_hw_table = table;
+
+ base_id = mlx5_sf_start_function_id(dev);
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id);
+ if (err)
+ goto table_err;
+
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL],
+ max_ext_fn, ext_base_id);
+ if (err)
+ goto ext_err;
+
+ mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn);
+ return 0;
+
+ext_err:
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
+table_err:
+ mutex_destroy(&table->table_lock);
+ kfree(table);
+ return err;
+}
+
+void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+ if (!table)
+ return;
+
+ mutex_destroy(&table->table_lock);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
+ kfree(table);
+}
+
+static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data)
+{
+ struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb);
+ const struct mlx5_vhca_state_event *event = data;
+ struct mlx5_sf_hwc_table *hwc;
+ struct mlx5_sf_hw *sf_hw;
+ u16 sw_id;
+
+ if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED)
+ return 0;
+
+ hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id);
+ if (!hwc)
+ return 0;
+
+ sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id);
+ sf_hw = &hwc->sfs[sw_id];
+
+ mutex_lock(&table->table_lock);
+ /* SF driver notified through firmware that SF is finally detached.
+ * Hence recycle the sf hardware id for reuse.
+ */
+ if (sf_hw->allocated && sf_hw->pending_delete)
+ mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id);
+ mutex_unlock(&table->table_lock);
+ return 0;
+}
+
+int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+ if (!table)
+ return 0;
+
+ table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event;
+ return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb);
+}
+
+void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+ if (!table)
+ return;
+
+ mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb);
+ /* Dealloc SFs whose firmware event has been missed. */
+ mlx5_sf_hw_table_dealloc_all(table);
+}
+
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.sf_hw_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h
new file mode 100644
index 000000000000..4fc870140d71
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_IFC_VHCA_EVENT_H__
+#define __MLX5_IFC_VHCA_EVENT_H__
+
+enum mlx5_ifc_vhca_state {
+ MLX5_VHCA_STATE_INVALID = 0x0,
+ MLX5_VHCA_STATE_ALLOCATED = 0x1,
+ MLX5_VHCA_STATE_ACTIVE = 0x2,
+ MLX5_VHCA_STATE_IN_USE = 0x3,
+ MLX5_VHCA_STATE_TEARDOWN_REQUEST = 0x4,
+};
+
+struct mlx5_ifc_vhca_state_context_bits {
+ u8 arm_change_event[0x1];
+ u8 reserved_at_1[0xb];
+ u8 vhca_state[0x4];
+ u8 reserved_at_10[0x10];
+
+ u8 sw_function_id[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_query_vhca_state_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_vhca_state_context_bits vhca_state_context;
+};
+
+struct mlx5_ifc_query_vhca_state_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 embedded_cpu_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_vhca_state_field_select_bits {
+ u8 reserved_at_0[0x1e];
+ u8 sw_function_id[0x1];
+ u8 arm_change_event[0x1];
+};
+
+struct mlx5_ifc_modify_vhca_state_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_vhca_state_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 embedded_cpu_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ struct mlx5_ifc_vhca_state_field_select_bits vhca_state_field_select;
+
+ struct mlx5_ifc_vhca_state_context_bits vhca_state_context;
+};
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
new file mode 100644
index 000000000000..7114f3fc335f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_SF_PRIV_H__
+#define __MLX5_SF_PRIV_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id);
+int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id);
+
+int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id);
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum);
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
new file mode 100644
index 000000000000..3a480e06ecc0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_SF_H__
+#define __MLX5_SF_H__
+
+#include <linux/mlx5/driver.h>
+#include "lib/sf.h"
+
+#ifdef CONFIG_MLX5_SF_MANAGER
+
+int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev);
+void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev);
+void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_sf_table_init(struct mlx5_core_dev *dev);
+void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_devlink_sf_port_new(struct devlink *devlink,
+ const struct devlink_port_new_attrs *add_attr,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index);
+int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
+ enum devlink_port_fn_state *state,
+ enum devlink_port_fn_opstate *opstate,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack);
+#else
+
+static inline int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
+{
+}
+
+static inline int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
+{
+}
+
+static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c
new file mode 100644
index 000000000000..d908fba968f0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_ifc_vhca_event.h"
+#include "mlx5_core.h"
+#include "vhca_event.h"
+#include "ecpf.h"
+#define CREATE_TRACE_POINTS
+#include "diag/vhca_tracepoint.h"
+
+struct mlx5_vhca_state_notifier {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ struct blocking_notifier_head n_head;
+};
+
+struct mlx5_vhca_event_work {
+ struct work_struct work;
+ struct mlx5_vhca_state_notifier *notifier;
+ struct mlx5_vhca_state_event event;
+};
+
+int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {};
+
+ MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE);
+ MLX5_SET(query_vhca_state_in, in, function_id, function_id);
+ MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, 0);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
+ u32 *in, u32 inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {};
+
+ MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE);
+ MLX5_SET(modify_vhca_state_in, in, function_id, function_id);
+ MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0);
+
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {};
+
+ MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE);
+ MLX5_SET(modify_vhca_state_in, in, function_id, function_id);
+ MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0);
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1);
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id);
+
+ return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out);
+}
+
+int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {};
+
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1);
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1);
+
+ return mlx5_cmd_modify_vhca_state(dev, function_id, in, sizeof(in));
+}
+
+static void
+mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *event)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ int err;
+
+ err = mlx5_cmd_query_vhca_state(dev, event->function_id, out, sizeof(out));
+ if (err)
+ return;
+
+ event->sw_function_id = MLX5_GET(query_vhca_state_out, out,
+ vhca_state_context.sw_function_id);
+ event->new_vhca_state = MLX5_GET(query_vhca_state_out, out,
+ vhca_state_context.vhca_state);
+
+ mlx5_vhca_event_arm(dev, event->function_id);
+ trace_mlx5_sf_vhca_event(dev, event);
+
+ blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event);
+}
+
+static void mlx5_vhca_state_work_handler(struct work_struct *_work)
+{
+ struct mlx5_vhca_event_work *work = container_of(_work, struct mlx5_vhca_event_work, work);
+ struct mlx5_vhca_state_notifier *notifier = work->notifier;
+ struct mlx5_core_dev *dev = notifier->dev;
+
+ mlx5_vhca_event_notify(dev, &work->event);
+ kfree(work);
+}
+
+static int
+mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_vhca_state_notifier *notifier =
+ mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb);
+ struct mlx5_vhca_event_work *work;
+ struct mlx5_eqe *eqe = data;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
+ INIT_WORK(&work->work, &mlx5_vhca_state_work_handler);
+ work->notifier = notifier;
+ work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id);
+ mlx5_events_work_enqueue(notifier->dev, &work->work);
+ return NOTIFY_OK;
+}
+
+void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap)
+{
+ if (!mlx5_vhca_event_supported(dev))
+ return;
+
+ MLX5_SET(cmd_hca_cap, set_hca_cap, vhca_state, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_allocated, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_active, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_in_use, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_teardown_request, 1);
+}
+
+int mlx5_vhca_event_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vhca_state_notifier *notifier;
+
+ if (!mlx5_vhca_event_supported(dev))
+ return 0;
+
+ notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
+ if (!notifier)
+ return -ENOMEM;
+
+ dev->priv.vhca_state_notifier = notifier;
+ notifier->dev = dev;
+ BLOCKING_INIT_NOTIFIER_HEAD(&notifier->n_head);
+ MLX5_NB_INIT(&notifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE);
+ return 0;
+}
+
+void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_vhca_event_supported(dev))
+ return;
+
+ kfree(dev->priv.vhca_state_notifier);
+ dev->priv.vhca_state_notifier = NULL;
+}
+
+void mlx5_vhca_event_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vhca_state_notifier *notifier;
+
+ if (!dev->priv.vhca_state_notifier)
+ return;
+
+ notifier = dev->priv.vhca_state_notifier;
+ mlx5_eq_notifier_register(dev, &notifier->nb);
+}
+
+void mlx5_vhca_event_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vhca_state_notifier *notifier;
+
+ if (!dev->priv.vhca_state_notifier)
+ return;
+
+ notifier = dev->priv.vhca_state_notifier;
+ mlx5_eq_notifier_unregister(dev, &notifier->nb);
+}
+
+int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ if (!dev->priv.vhca_state_notifier)
+ return -EOPNOTSUPP;
+ return blocking_notifier_chain_register(&dev->priv.vhca_state_notifier->n_head, nb);
+}
+
+void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&dev->priv.vhca_state_notifier->n_head, nb);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h
new file mode 100644
index 000000000000..013cdfe90616
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_VHCA_EVENT_H__
+#define __MLX5_VHCA_EVENT_H__
+
+#ifdef CONFIG_MLX5_SF
+
+struct mlx5_vhca_state_event {
+ u16 function_id;
+ u16 sw_function_id;
+ u8 new_vhca_state;
+};
+
+static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN_MAX(dev, vhca_state);
+}
+
+void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap);
+int mlx5_vhca_event_init(struct mlx5_core_dev *dev);
+void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev);
+void mlx5_vhca_event_start(struct mlx5_core_dev *dev);
+void mlx5_vhca_event_stop(struct mlx5_core_dev *dev);
+int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
+void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
+int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id);
+int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id);
+int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
+ u32 *out, u32 outlen);
+#else
+
+static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap)
+{
+}
+
+static inline int mlx5_vhca_event_init(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev)
+{
+}
+
+static inline void mlx5_vhca_event_start(struct mlx5_core_dev *dev)
+{
+}
+
+static inline void mlx5_vhca_event_stop(struct mlx5_core_dev *dev)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 03f037811f1d..c0e6c487c63c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -34,6 +34,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
+#include "mlx5_irq.h"
#include "eswitch.h"
static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
@@ -71,14 +72,12 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
- int err;
- int vf;
+ int err, vf, num_msix_count;
if (!MLX5_ESWITCH_MANAGER(dev))
goto enable_vfs_hca;
- mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs);
- err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY);
+ err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs);
if (err) {
mlx5_core_warn(dev,
"failed to enable eswitch SRIOV (%d)\n", err);
@@ -86,12 +85,27 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
}
enable_vfs_hca:
+ num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
for (vf = 0; vf < num_vfs; vf++) {
+ /* Notify the VF before its enablement to let it set
+ * some stuff.
+ */
+ blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
+ MLX5_PF_NOTIFY_ENABLE_VF, dev);
err = mlx5_core_enable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
continue;
}
+
+ err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to set MSI-X vector counts VF %d, err %d\n",
+ vf, err);
+ continue;
+ }
+
sriov->vfs_ctx[vf].enabled = 1;
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
err = sriov_restore_guids(dev, vf);
@@ -118,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
for (vf = num_vfs - 1; vf >= 0; vf--) {
if (!sriov->vfs_ctx[vf].enabled)
continue;
+ /* Notify the VF before its disablement to let it clean
+ * some resources.
+ */
+ blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
+ MLX5_PF_NOTIFY_DISABLE_VF, dev);
err = mlx5_core_disable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
@@ -126,8 +145,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
sriov->vfs_ctx[vf].enabled = 0;
}
- if (MLX5_ESWITCH_MANAGER(dev))
- mlx5_eswitch_disable(dev->priv.eswitch, clear_vf);
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf);
if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -136,9 +154,12 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
int err;
+ devl_lock(devlink);
err = mlx5_device_enable_sriov(dev, num_vfs);
+ devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
return err;
@@ -152,13 +173,16 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
return err;
}
-static void mlx5_sriov_disable(struct pci_dev *pdev)
+void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
int num_vfs = pci_num_vf(dev->pdev);
pci_disable_sriov(pdev);
+ devl_lock(devlink);
mlx5_device_disable_sriov(dev, num_vfs, true);
+ devl_unlock(devlink);
}
int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -179,6 +203,30 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
return err ? err : num_vfs;
}
+int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
+{
+ struct pci_dev *pf = pci_physfn(vf);
+ struct mlx5_core_sriov *sriov;
+ struct mlx5_core_dev *dev;
+ int num_vf_msix, id;
+
+ dev = pci_get_drvdata(pf);
+ num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+ if (!num_vf_msix)
+ return -EOPNOTSUPP;
+
+ if (!msix_vec_count)
+ msix_vec_count =
+ mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
+
+ sriov = &dev->priv.sriov;
+ id = pci_iov_vf_id(vf);
+ if (id < 0 || !sriov->vfs_ctx[id].enabled)
+ return -EINVAL;
+
+ return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
+}
+
int mlx5_sriov_attach(struct mlx5_core_dev *dev)
{
if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
@@ -224,7 +272,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
struct pci_dev *pdev = dev->pdev;
- int total_vfs;
+ int total_vfs, i;
if (!mlx5_core_is_pf(dev))
return 0;
@@ -236,6 +284,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
if (!sriov->vfs_ctx)
return -ENOMEM;
+ for (i = 0; i < total_vfs; i++)
+ BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier);
+
return 0;
}
@@ -248,3 +299,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
kfree(sriov->vfs_ctx);
}
+
+/**
+ * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from
+ * a notification block chain.
+ *
+ * @mdev: The mlx5 core device.
+ * @vf_id: The VF id.
+ * @nb: The notifier block to be unregistered.
+ */
+void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
+ int vf_id,
+ struct notifier_block *nb)
+{
+ struct mlx5_vf_context *vfs_ctx;
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &mdev->priv.sriov;
+ if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs))
+ return;
+
+ vfs_ctx = &sriov->vfs_ctx[vf_id];
+ blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb);
+}
+EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister);
+
+/**
+ * mlx5_sriov_blocking_notifier_register - Register a VF notification
+ * block chain.
+ *
+ * @mdev: The mlx5 core device.
+ * @vf_id: The VF id.
+ * @nb: The notifier block to be called upon the VF events.
+ *
+ * Returns 0 on success or an error code.
+ */
+int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
+ int vf_id,
+ struct notifier_block *nb)
+{
+ struct mlx5_vf_context *vfs_ctx;
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &mdev->priv.sriov;
+ if (vf_id < 0 || vf_id >= sriov->num_vfs)
+ return -EINVAL;
+
+ vfs_ctx = &sriov->vfs_ctx[vf_id];
+ return blocking_notifier_chain_register(&vfs_ctx->notifier, nb);
+}
+EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 6dec2a550a10..b1dfad274a39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "dr_types.h"
+#include "dr_ste.h"
enum dr_action_domain {
DR_ACTION_DOMAIN_NIC_INGRESS,
@@ -14,14 +15,45 @@ enum dr_action_domain {
enum dr_action_valid_state {
DR_ACTION_STATE_ERR,
DR_ACTION_STATE_NO_ACTION,
- DR_ACTION_STATE_REFORMAT,
+ DR_ACTION_STATE_ENCAP,
+ DR_ACTION_STATE_DECAP,
DR_ACTION_STATE_MODIFY_HDR,
- DR_ACTION_STATE_MODIFY_VLAN,
+ DR_ACTION_STATE_POP_VLAN,
+ DR_ACTION_STATE_PUSH_VLAN,
DR_ACTION_STATE_NON_TERM,
DR_ACTION_STATE_TERM,
+ DR_ACTION_STATE_ASO,
DR_ACTION_STATE_MAX,
};
+static const char * const action_type_to_str[] = {
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = "DR_ACTION_TYP_TNL_L2_TO_L2",
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = "DR_ACTION_TYP_L2_TO_TNL_L2",
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = "DR_ACTION_TYP_TNL_L3_TO_L2",
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = "DR_ACTION_TYP_L2_TO_TNL_L3",
+ [DR_ACTION_TYP_DROP] = "DR_ACTION_TYP_DROP",
+ [DR_ACTION_TYP_QP] = "DR_ACTION_TYP_QP",
+ [DR_ACTION_TYP_FT] = "DR_ACTION_TYP_FT",
+ [DR_ACTION_TYP_CTR] = "DR_ACTION_TYP_CTR",
+ [DR_ACTION_TYP_TAG] = "DR_ACTION_TYP_TAG",
+ [DR_ACTION_TYP_MODIFY_HDR] = "DR_ACTION_TYP_MODIFY_HDR",
+ [DR_ACTION_TYP_VPORT] = "DR_ACTION_TYP_VPORT",
+ [DR_ACTION_TYP_POP_VLAN] = "DR_ACTION_TYP_POP_VLAN",
+ [DR_ACTION_TYP_PUSH_VLAN] = "DR_ACTION_TYP_PUSH_VLAN",
+ [DR_ACTION_TYP_SAMPLER] = "DR_ACTION_TYP_SAMPLER",
+ [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR",
+ [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR",
+ [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER",
+ [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN",
+};
+
+static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
+{
+ if (action_id > DR_ACTION_TYP_MAX)
+ action_id = DR_ACTION_TYP_MAX;
+ return action_type_to_str[action_id];
+}
+
static const enum dr_action_valid_state
next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = {
[DR_ACTION_DOMAIN_NIC_INGRESS] = {
@@ -29,48 +61,105 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
- [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
- [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -80,41 +169,87 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NO_ACTION] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -124,44 +259,103 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NO_ACTION] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
- [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
- [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -171,46 +365,95 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NO_ACTION] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
- [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
- [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
- [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -218,158 +461,6 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
},
};
-struct dr_action_modify_field_conv {
- u16 hw_field;
- u8 start;
- u8 end;
- u8 l3_type;
- u8 l4_type;
-};
-
-static const struct dr_action_modify_field_conv dr_action_conv_arr[] = {
- [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 16, .end = 47,
- },
- [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 0, .end = 15,
- },
- [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 32, .end = 47,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 16, .end = 47,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 0, .end = 15,
- },
- [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 0, .end = 5,
- },
- [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 48, .end = 56,
- .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
- },
- [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15,
- .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
- },
- [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31,
- .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
- },
- [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
- },
- [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15,
- .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP,
- },
- [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31,
- .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP,
- },
- [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 32, .end = 63,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 0, .end = 31,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 32, .end = 63,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 0, .end = 31,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 32, .end = 63,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 0, .end = 31,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
- },
- [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
- },
- [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63,
- .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 0, .end = 31,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 32, .end = 63,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 32, .end = 63,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 0, .end = 31,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 32, .end = 63,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 0, .end = 31,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 32, .end = 63,
- },
- [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 0, .end = 31,
- },
- [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 32, .end = 63,
- },
- [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 0, .end = 31,
- },
- [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
- .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 0, .end = 15,
- },
-};
-
-#define MAX_VLANS 2
-struct dr_action_vlan_info {
- int count;
- u32 headers[MAX_VLANS];
-};
-
-struct dr_action_apply_attr {
- u32 modify_index;
- u16 modify_actions;
- u32 decap_index;
- u16 decap_actions;
- u8 decap_with_vlan:1;
- u64 final_icm_addr;
- u32 flow_tag;
- u32 ctr_id;
- u16 gvmi;
- u16 hit_gvmi;
- u32 reformat_id;
- u32 reformat_size;
- struct dr_action_vlan_info vlans;
-};
-
static int
dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type,
enum mlx5dr_action_type *action_type)
@@ -387,6 +478,12 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3:
*action_type = DR_ACTION_TYP_L2_TO_TNL_L3;
break;
+ case DR_ACTION_REFORMAT_TYP_INSERT_HDR:
+ *action_type = DR_ACTION_TYP_INSERT_HDR;
+ break;
+ case DR_ACTION_REFORMAT_TYP_REMOVE_HDR:
+ *action_type = DR_ACTION_TYP_REMOVE_HDR;
+ break;
default:
return -EINVAL;
}
@@ -394,169 +491,33 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
return 0;
}
-static void dr_actions_init_next_ste(u8 **last_ste,
- u32 *added_stes,
- enum mlx5dr_ste_entry_type entry_type,
- u16 gvmi)
-{
- (*added_stes)++;
- *last_ste += DR_STE_SIZE;
- mlx5dr_ste_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, entry_type, gvmi);
-}
-
-static void dr_actions_apply_tx(struct mlx5dr_domain *dmn,
- u8 *action_type_set,
- u8 *last_ste,
- struct dr_action_apply_attr *attr,
- u32 *added_stes)
-{
- bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] ||
- action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3];
-
- /* We want to make sure the modify header comes before L2
- * encapsulation. The reason for that is that we support
- * modify headers for outer headers only
- */
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
- mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
- mlx5dr_ste_set_rewrite_actions(last_ste,
- attr->modify_actions,
- attr->modify_index);
- }
-
- if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
- int i;
-
- for (i = 0; i < attr->vlans.count; i++) {
- if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
- dr_actions_init_next_ste(&last_ste,
- added_stes,
- MLX5DR_STE_TYPE_TX,
- attr->gvmi);
-
- mlx5dr_ste_set_tx_push_vlan(last_ste,
- attr->vlans.headers[i],
- encap);
- }
- }
-
- if (encap) {
- /* Modify header and encapsulation require a different STEs.
- * Since modify header STE format doesn't support encapsulation
- * tunneling_action.
- */
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] ||
- action_type_set[DR_ACTION_TYP_PUSH_VLAN])
- dr_actions_init_next_ste(&last_ste,
- added_stes,
- MLX5DR_STE_TYPE_TX,
- attr->gvmi);
-
- mlx5dr_ste_set_tx_encap(last_ste,
- attr->reformat_id,
- attr->reformat_size,
- action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]);
- /* Whenever prio_tag_required enabled, we can be sure that the
- * previous table (ACL) already push vlan to our packet,
- * And due to HW limitation we need to set this bit, otherwise
- * push vlan + reformat will not work.
- */
- if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required))
- mlx5dr_ste_set_go_back_bit(last_ste);
- }
-
- if (action_type_set[DR_ACTION_TYP_CTR])
- mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id);
-}
-
-static void dr_actions_apply_rx(u8 *action_type_set,
- u8 *last_ste,
- struct dr_action_apply_attr *attr,
- u32 *added_stes)
-{
- if (action_type_set[DR_ACTION_TYP_CTR])
- mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id);
-
- if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
- mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
- mlx5dr_ste_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
- mlx5dr_ste_set_rewrite_actions(last_ste,
- attr->decap_actions,
- attr->decap_index);
- }
-
- if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2])
- mlx5dr_ste_set_rx_decap(last_ste);
-
- if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
- int i;
-
- for (i = 0; i < attr->vlans.count; i++) {
- if (i ||
- action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] ||
- action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
- dr_actions_init_next_ste(&last_ste,
- added_stes,
- MLX5DR_STE_TYPE_RX,
- attr->gvmi);
-
- mlx5dr_ste_set_rx_pop_vlan(last_ste);
- }
- }
-
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
- if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
- dr_actions_init_next_ste(&last_ste,
- added_stes,
- MLX5DR_STE_TYPE_MODIFY_PKT,
- attr->gvmi);
- else
- mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
-
- mlx5dr_ste_set_rewrite_actions(last_ste,
- attr->modify_actions,
- attr->modify_index);
- }
-
- if (action_type_set[DR_ACTION_TYP_TAG]) {
- if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
- dr_actions_init_next_ste(&last_ste,
- added_stes,
- MLX5DR_STE_TYPE_RX,
- attr->gvmi);
-
- mlx5dr_ste_rx_set_flow_tag(last_ste, attr->flow_tag);
- }
-}
-
/* Apply the actions on the rule STE array starting from the last_ste.
* Actions might require more than one STE, new_num_stes will return
* the new size of the STEs array, rule with actions.
*/
static void dr_actions_apply(struct mlx5dr_domain *dmn,
- enum mlx5dr_ste_entry_type ste_type,
+ enum mlx5dr_domain_nic_type nic_type,
u8 *action_type_set,
u8 *last_ste,
- struct dr_action_apply_attr *attr,
+ struct mlx5dr_ste_actions_attr *attr,
u32 *new_num_stes)
{
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
u32 added_stes = 0;
- if (ste_type == MLX5DR_STE_TYPE_RX)
- dr_actions_apply_rx(action_type_set, last_ste, attr, &added_stes);
+ if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
+ mlx5dr_ste_set_actions_rx(ste_ctx, dmn, action_type_set,
+ last_ste, attr, &added_stes);
else
- dr_actions_apply_tx(dmn, action_type_set, last_ste, attr, &added_stes);
+ mlx5dr_ste_set_actions_tx(ste_ctx, dmn, action_type_set,
+ last_ste, attr, &added_stes);
- last_ste += added_stes * DR_STE_SIZE;
*new_num_stes += added_stes;
-
- mlx5dr_ste_set_hit_gvmi(last_ste, attr->hit_gvmi);
- mlx5dr_ste_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
static enum dr_action_domain
dr_action_get_action_domain(enum mlx5dr_domain_type domain,
- enum mlx5dr_ste_entry_type ste_type)
+ enum mlx5dr_domain_nic_type nic_type)
{
switch (domain) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -564,7 +525,7 @@ dr_action_get_action_domain(enum mlx5dr_domain_type domain,
case MLX5DR_DOMAIN_TYPE_NIC_TX:
return DR_ACTION_DOMAIN_NIC_EGRESS;
case MLX5DR_DOMAIN_TYPE_FDB:
- if (ste_type == MLX5DR_STE_TYPE_RX)
+ if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
return DR_ACTION_DOMAIN_FDB_INGRESS;
return DR_ACTION_DOMAIN_FDB_EGRESS;
default:
@@ -601,8 +562,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
* table, since there is an *assumption* that in such case FW
* will recalculate the CS.
*/
- if (dest_action->dest_tbl.is_fw_tbl) {
- *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr;
+ if (dest_action->dest_tbl->is_fw_tbl) {
+ *final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr;
} else {
mlx5dr_dbg(dmn,
"Destination FT should be terminating when modify TTL is used\n");
@@ -614,9 +575,9 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
/* If destination is vport we will get the FW flow table
* that recalculates the CS and forwards to the vport.
*/
- ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn,
- dest_action->vport.caps->num,
- final_icm_addr);
+ ret = mlx5dr_domain_get_recalc_cs_ft_addr(dest_action->vport->dmn,
+ dest_action->vport->caps->num,
+ final_icm_addr);
if (ret) {
mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
return ret;
@@ -630,6 +591,49 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
return 0;
}
+static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_actions_attr *attr,
+ bool rx_rule,
+ bool *recalc_cs_required)
+{
+ *recalc_cs_required = false;
+
+ /* if device supports csum recalculation - no adjustment needed */
+ if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps))
+ return;
+
+ /* no adjustment needed on TX rules */
+ if (!rx_rule)
+ return;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) {
+ /* Ignore the modify TTL action.
+ * It is always kept as last HW action.
+ */
+ attr->modify_actions--;
+ return;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+ /* Due to a HW bug on some devices, modifying TTL on RX flows
+ * will cause an incorrect checksum calculation. In such cases
+ * we will use a FW table to recalculate the checksum.
+ */
+ *recalc_cs_required = true;
+}
+
+static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *actions[],
+ int last_idx)
+{
+ int i;
+
+ for (i = 0; i <= last_idx; i++)
+ mlx5dr_err(dmn, "< %s (%d) > ",
+ dr_action_id_to_str(actions[i]->action_type),
+ actions[i]->action_type);
+}
+
#define WITH_VLAN_NUM_HW_ACTIONS 6
int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
@@ -640,12 +644,12 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
u32 *new_hw_ste_arr_sz)
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
- bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+ bool rx_rule = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
u8 action_type_set[DR_ACTION_TYP_MAX] = {};
+ struct mlx5dr_ste_actions_attr attr = {};
struct mlx5dr_action *dest_action = NULL;
u32 state = DR_ACTION_STATE_NO_ACTION;
- struct dr_action_apply_attr attr = {};
enum dr_action_domain action_domain;
bool recalc_cs_required = false;
u8 *last_ste;
@@ -654,9 +658,11 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
attr.gvmi = dmn->info.caps.gvmi;
attr.hit_gvmi = dmn->info.caps.gvmi;
attr.final_icm_addr = nic_dmn->default_icm_addr;
- action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type);
+ action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type);
for (i = 0; i < num_actions; i++) {
+ struct mlx5dr_action_dest_tbl *dest_tbl;
+ struct mlx5dr_icm_chunk *chunk;
struct mlx5dr_action *action;
int max_actions_type = 1;
u32 action_type;
@@ -670,118 +676,154 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
break;
case DR_ACTION_TYP_FT:
dest_action = action;
- if (!action->dest_tbl.is_fw_tbl) {
- if (action->dest_tbl.tbl->dmn != dmn) {
- mlx5dr_dbg(dmn,
+ dest_tbl = action->dest_tbl;
+ if (!dest_tbl->is_fw_tbl) {
+ if (dest_tbl->tbl->dmn != dmn) {
+ mlx5dr_err(dmn,
"Destination table belongs to a different domain\n");
- goto out_invalid_arg;
+ return -EINVAL;
}
- if (action->dest_tbl.tbl->level <= matcher->tbl->level) {
- mlx5_core_warn_once(dmn->mdev,
- "Connecting table to a lower/same level destination table\n");
+ if (dest_tbl->tbl->level <= matcher->tbl->level) {
+ mlx5_core_dbg_once(dmn->mdev,
+ "Connecting table to a lower/same level destination table\n");
mlx5dr_dbg(dmn,
"Connecting table at level %d to a destination table at level %d\n",
matcher->tbl->level,
- action->dest_tbl.tbl->level);
+ dest_tbl->tbl->level);
}
- attr.final_icm_addr = rx_rule ?
- action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
- action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr;
+ chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk :
+ dest_tbl->tbl->tx.s_anchor->chunk;
+ attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
} else {
struct mlx5dr_cmd_query_flow_table_details output;
int ret;
/* get the relevant addresses */
- if (!action->dest_tbl.fw_tbl.rx_icm_addr) {
+ if (!action->dest_tbl->fw_tbl.rx_icm_addr) {
ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
- action->dest_tbl.fw_tbl.type,
- action->dest_tbl.fw_tbl.id,
+ dest_tbl->fw_tbl.type,
+ dest_tbl->fw_tbl.id,
&output);
if (!ret) {
- action->dest_tbl.fw_tbl.tx_icm_addr =
+ dest_tbl->fw_tbl.tx_icm_addr =
output.sw_owner_icm_root_1;
- action->dest_tbl.fw_tbl.rx_icm_addr =
+ dest_tbl->fw_tbl.rx_icm_addr =
output.sw_owner_icm_root_0;
} else {
- mlx5dr_dbg(dmn,
+ mlx5dr_err(dmn,
"Failed mlx5_cmd_query_flow_table ret: %d\n",
ret);
return ret;
}
}
attr.final_icm_addr = rx_rule ?
- action->dest_tbl.fw_tbl.rx_icm_addr :
- action->dest_tbl.fw_tbl.tx_icm_addr;
+ dest_tbl->fw_tbl.rx_icm_addr :
+ dest_tbl->fw_tbl.tx_icm_addr;
}
break;
case DR_ACTION_TYP_QP:
mlx5dr_info(dmn, "Domain doesn't support QP\n");
- goto out_invalid_arg;
+ return -EOPNOTSUPP;
case DR_ACTION_TYP_CTR:
- attr.ctr_id = action->ctr.ctr_id +
- action->ctr.offeset;
+ attr.ctr_id = action->ctr->ctr_id +
+ action->ctr->offset;
break;
case DR_ACTION_TYP_TAG:
- attr.flow_tag = action->flow_tag;
+ attr.flow_tag = action->flow_tag->flow_tag;
break;
case DR_ACTION_TYP_TNL_L2_TO_L2:
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- attr.decap_index = action->rewrite.index;
- attr.decap_actions = action->rewrite.num_of_actions;
+ attr.decap_index = action->rewrite->index;
+ attr.decap_actions = action->rewrite->num_of_actions;
attr.decap_with_vlan =
attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
break;
case DR_ACTION_TYP_MODIFY_HDR:
- attr.modify_index = action->rewrite.index;
- attr.modify_actions = action->rewrite.num_of_actions;
- recalc_cs_required = action->rewrite.modify_ttl;
+ attr.modify_index = action->rewrite->index;
+ attr.modify_actions = action->rewrite->num_of_actions;
+ if (action->rewrite->modify_ttl)
+ dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
+ &recalc_cs_required);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
case DR_ACTION_TYP_L2_TO_TNL_L3:
- attr.reformat_size = action->reformat.reformat_size;
- attr.reformat_id = action->reformat.reformat_id;
+ if (rx_rule &&
+ !(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) {
+ mlx5dr_info(dmn, "Device doesn't support Encap on RX\n");
+ return -EOPNOTSUPP;
+ }
+ attr.reformat.size = action->reformat->size;
+ attr.reformat.id = action->reformat->id;
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ attr.final_icm_addr = rx_rule ? action->sampler->rx_icm_addr :
+ action->sampler->tx_icm_addr;
break;
case DR_ACTION_TYP_VPORT:
- attr.hit_gvmi = action->vport.caps->vhca_gvmi;
+ attr.hit_gvmi = action->vport->caps->vhca_gvmi;
dest_action = action;
- if (rx_rule) {
- /* Loopback on WIRE vport is not supported */
- if (action->vport.caps->num == WIRE_PORT)
- goto out_invalid_arg;
-
- attr.final_icm_addr = action->vport.caps->icm_address_rx;
- } else {
- attr.final_icm_addr = action->vport.caps->icm_address_tx;
- }
+ attr.final_icm_addr = rx_rule ?
+ action->vport->caps->icm_address_rx :
+ action->vport->caps->icm_address_tx;
break;
case DR_ACTION_TYP_POP_VLAN:
- max_actions_type = MAX_VLANS;
+ if (!rx_rule && !(dmn->ste_ctx->actions_caps &
+ DR_STE_CTX_ACTION_CAP_TX_POP)) {
+ mlx5dr_dbg(dmn, "Device doesn't support POP VLAN action on TX\n");
+ return -EOPNOTSUPP;
+ }
+
+ max_actions_type = MLX5DR_MAX_VLANS;
attr.vlans.count++;
break;
case DR_ACTION_TYP_PUSH_VLAN:
- max_actions_type = MAX_VLANS;
- if (attr.vlans.count == MAX_VLANS)
+ if (rx_rule && !(dmn->ste_ctx->actions_caps &
+ DR_STE_CTX_ACTION_CAP_RX_PUSH)) {
+ mlx5dr_dbg(dmn, "Device doesn't support PUSH VLAN action on RX\n");
+ return -EOPNOTSUPP;
+ }
+
+ max_actions_type = MLX5DR_MAX_VLANS;
+ if (attr.vlans.count == MLX5DR_MAX_VLANS) {
+ mlx5dr_dbg(dmn, "Max VLAN push/pop count exceeded\n");
return -EINVAL;
+ }
- attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr;
+ attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr;
+ break;
+ case DR_ACTION_TYP_INSERT_HDR:
+ case DR_ACTION_TYP_REMOVE_HDR:
+ attr.reformat.size = action->reformat->size;
+ attr.reformat.id = action->reformat->id;
+ attr.reformat.param_0 = action->reformat->param_0;
+ attr.reformat.param_1 = action->reformat->param_1;
+ break;
+ case DR_ACTION_TYP_ASO_FLOW_METER:
+ attr.aso_flow_meter.obj_id = action->aso->obj_id;
+ attr.aso_flow_meter.offset = action->aso->offset;
+ attr.aso_flow_meter.dest_reg_id = action->aso->dest_reg_id;
+ attr.aso_flow_meter.init_color = action->aso->init_color;
break;
default:
- goto out_invalid_arg;
+ mlx5dr_err(dmn, "Unsupported action type %d\n", action_type);
+ return -EINVAL;
}
/* Check action duplication */
if (++action_type_set[action_type] > max_actions_type) {
- mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n",
+ mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n",
action_type, max_actions_type);
- goto out_invalid_arg;
+ return -EINVAL;
}
/* Check action state machine is valid */
if (dr_action_validate_and_get_next_state(action_domain,
action_type,
&state)) {
- mlx5dr_dbg(dmn, "Invalid action sequence provided\n");
+ mlx5dr_err(dmn, "Invalid action (gvmi: %d, is_rx: %d) sequence provided:",
+ attr.gvmi, rx_rule);
+ dr_action_print_sequence(dmn, actions, i);
return -EOPNOTSUPP;
}
}
@@ -789,15 +831,10 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
*new_hw_ste_arr_sz = nic_matcher->num_of_builders;
last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
- /* Due to a HW bug, modifying TTL on RX flows will cause an incorrect
- * checksum calculation. In this case we will use a FW table to
- * recalculate.
- */
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
- rx_rule && recalc_cs_required && dest_action) {
+ if (recalc_cs_required && dest_action) {
ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
if (ret) {
- mlx5dr_dbg(dmn,
+ mlx5dr_err(dmn,
"Failed to handle checksum recalculation err %d\n",
ret);
return ret;
@@ -805,156 +842,50 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
}
dr_actions_apply(dmn,
- nic_dmn->ste_type,
+ nic_dmn->type,
action_type_set,
last_ste,
&attr,
new_hw_ste_arr_sz);
return 0;
-
-out_invalid_arg:
- return -EINVAL;
}
-#define CVLAN_ETHERTYPE 0x8100
-#define SVLAN_ETHERTYPE 0x88a8
-#define HDR_LEN_L2_ONLY 14
-#define HDR_LEN_L2_VLAN 18
-#define REWRITE_HW_ACTION_NUM 6
-
-static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn,
- struct mlx5dr_action *action,
- void *data, size_t data_sz)
-{
- struct mlx5_ifc_l2_hdr_bits *l2_hdr = data;
- u64 ops[REWRITE_HW_ACTION_NUM] = {};
- u32 hdr_fld_4b;
- u16 hdr_fld_2b;
- u16 vlan_type;
- bool vlan;
- int i = 0;
- int ret;
-
- vlan = (data_sz != HDR_LEN_L2_ONLY);
-
- /* dmac_47_16 */
- MLX5_SET(dr_action_hw_set, ops + i,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_length, 0);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_left_shifter, 16);
- hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16);
- MLX5_SET(dr_action_hw_set, ops + i,
- inline_data, hdr_fld_4b);
- i++;
-
- /* smac_47_16 */
- MLX5_SET(dr_action_hw_set, ops + i,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_length, 0);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_left_shifter, 16);
- hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 |
- MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16);
- MLX5_SET(dr_action_hw_set, ops + i,
- inline_data, hdr_fld_4b);
- i++;
-
- /* dmac_15_0 */
- MLX5_SET(dr_action_hw_set, ops + i,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_length, 16);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_left_shifter, 0);
- hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0);
- MLX5_SET(dr_action_hw_set, ops + i,
- inline_data, hdr_fld_2b);
- i++;
-
- /* ethertype + (optional) vlan */
- MLX5_SET(dr_action_hw_set, ops + i,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_left_shifter, 32);
- if (!vlan) {
- hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
- MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_2b);
- MLX5_SET(dr_action_hw_set, ops + i, destination_length, 16);
- } else {
- hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
- vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN;
- hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan);
- hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b;
- MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_4b);
- MLX5_SET(dr_action_hw_set, ops + i, destination_length, 18);
- }
- i++;
-
- /* smac_15_0 */
- MLX5_SET(dr_action_hw_set, ops + i,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_length, 16);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_left_shifter, 0);
- hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0);
- MLX5_SET(dr_action_hw_set, ops + i,
- inline_data, hdr_fld_2b);
- i++;
-
- if (vlan) {
- MLX5_SET(dr_action_hw_set, ops + i,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
- hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type);
- MLX5_SET(dr_action_hw_set, ops + i,
- inline_data, hdr_fld_2b);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_length, 16);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2);
- MLX5_SET(dr_action_hw_set, ops + i,
- destination_left_shifter, 0);
- i++;
- }
-
- action->rewrite.data = (void *)ops;
- action->rewrite.num_of_actions = i;
- action->rewrite.chunk->byte_size = i * sizeof(*ops);
-
- ret = mlx5dr_send_postsend_action(dmn, action);
- if (ret) {
- mlx5dr_dbg(dmn, "Writing encapsulation action to ICM failed\n");
- return ret;
- }
-
- return 0;
-}
+static unsigned int action_size[DR_ACTION_TYP_MAX] = {
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite),
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_FT] = sizeof(struct mlx5dr_action_dest_tbl),
+ [DR_ACTION_TYP_CTR] = sizeof(struct mlx5dr_action_ctr),
+ [DR_ACTION_TYP_TAG] = sizeof(struct mlx5dr_action_flow_tag),
+ [DR_ACTION_TYP_MODIFY_HDR] = sizeof(struct mlx5dr_action_rewrite),
+ [DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport),
+ [DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan),
+ [DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler),
+ [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter),
+};
static struct mlx5dr_action *
dr_action_create_generic(enum mlx5dr_action_type action_type)
{
struct mlx5dr_action *action;
+ int extra_size;
- action = kzalloc(sizeof(*action), GFP_KERNEL);
+ if (action_type < DR_ACTION_TYP_MAX)
+ extra_size = action_size[action_type];
+ else
+ return NULL;
+
+ action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL);
if (!action)
return NULL;
action->action_type = action_type;
refcount_set(&action->refcount, 1);
+ action->data = action + 1;
return action;
}
@@ -965,6 +896,24 @@ struct mlx5dr_action *mlx5dr_action_create_drop(void)
}
struct mlx5dr_action *
+mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ return NULL;
+
+ action->dest_tbl->is_fw_tbl = true;
+ action->dest_tbl->fw_tbl.dmn = dmn;
+ action->dest_tbl->fw_tbl.id = table_num;
+ action->dest_tbl->fw_tbl.type = FS_FT_FDB;
+ refcount_inc(&dmn->refcount);
+
+ return action;
+}
+
+struct mlx5dr_action *
mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl)
{
struct mlx5dr_action *action;
@@ -975,7 +924,7 @@ mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl)
if (!action)
goto dec_ref;
- action->dest_tbl.tbl = tbl;
+ action->dest_tbl->tbl = tbl;
return action;
@@ -987,13 +936,16 @@ dec_ref:
struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
- u32 num_of_dests)
+ u32 num_of_dests,
+ bool ignore_flow_level,
+ u32 flow_source)
{
struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
struct mlx5dr_action **ref_actions;
struct mlx5dr_action *action;
bool reformat_req = false;
u32 num_of_ref = 0;
+ u32 ref_act_cnt;
int ret;
int i;
@@ -1002,11 +954,14 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
return NULL;
}
- hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL);
+ hw_dests = kcalloc(num_of_dests, sizeof(*hw_dests), GFP_KERNEL);
if (!hw_dests)
return NULL;
- ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL);
+ if (unlikely(check_mul_overflow(num_of_dests, 2u, &ref_act_cnt)))
+ goto free_hw_dests;
+
+ ref_actions = kcalloc(ref_act_cnt, sizeof(*ref_actions), GFP_KERNEL);
if (!ref_actions)
goto free_hw_dests;
@@ -1020,12 +975,12 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
case DR_ACTION_TYP_VPORT:
hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- hw_dests[i].vport.num = dest_action->vport.caps->num;
- hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi;
+ hw_dests[i].vport.num = dest_action->vport->caps->num;
+ hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi;
if (reformat_action) {
reformat_req = true;
hw_dests[i].vport.reformat_id =
- reformat_action->reformat.reformat_id;
+ reformat_action->reformat->id;
ref_actions[num_of_ref++] = reformat_action;
hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
}
@@ -1033,10 +988,10 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
case DR_ACTION_TYP_FT:
hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- if (dest_action->dest_tbl.is_fw_tbl)
- hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id;
+ if (dest_action->dest_tbl->is_fw_tbl)
+ hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id;
else
- hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id;
+ hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id;
break;
default:
@@ -1053,8 +1008,10 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
hw_dests,
num_of_dests,
reformat_req,
- &action->dest_tbl.fw_tbl.id,
- &action->dest_tbl.fw_tbl.group_id);
+ &action->dest_tbl->fw_tbl.id,
+ &action->dest_tbl->fw_tbl.group_id,
+ ignore_flow_level,
+ flow_source);
if (ret)
goto free_action;
@@ -1063,11 +1020,11 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
for (i = 0; i < num_of_ref; i++)
refcount_inc(&ref_actions[i]->refcount);
- action->dest_tbl.is_fw_tbl = true;
- action->dest_tbl.fw_tbl.dmn = dmn;
- action->dest_tbl.fw_tbl.type = FS_FT_FDB;
- action->dest_tbl.fw_tbl.ref_actions = ref_actions;
- action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref;
+ action->dest_tbl->is_fw_tbl = true;
+ action->dest_tbl->fw_tbl.dmn = dmn;
+ action->dest_tbl->fw_tbl.type = FS_FT_FDB;
+ action->dest_tbl->fw_tbl.ref_actions = ref_actions;
+ action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref;
kfree(hw_dests);
@@ -1092,10 +1049,10 @@ mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn,
if (!action)
return NULL;
- action->dest_tbl.is_fw_tbl = 1;
- action->dest_tbl.fw_tbl.type = ft->type;
- action->dest_tbl.fw_tbl.id = ft->id;
- action->dest_tbl.fw_tbl.dmn = dmn;
+ action->dest_tbl->is_fw_tbl = 1;
+ action->dest_tbl->fw_tbl.type = ft->type;
+ action->dest_tbl->fw_tbl.id = ft->id;
+ action->dest_tbl->fw_tbl.dmn = dmn;
refcount_inc(&dmn->refcount);
@@ -1111,7 +1068,7 @@ mlx5dr_action_create_flow_counter(u32 counter_id)
if (!action)
return NULL;
- action->ctr.ctr_id = counter_id;
+ action->ctr->ctr_id = counter_id;
return action;
}
@@ -1124,20 +1081,61 @@ struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
if (!action)
return NULL;
- action->flow_tag = tag_value & 0xffffff;
+ action->flow_tag->flow_tag = tag_value & 0xffffff;
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id)
+{
+ struct mlx5dr_action *action;
+ u64 icm_rx, icm_tx;
+ int ret;
+
+ ret = mlx5dr_cmd_query_flow_sampler(dmn->mdev, sampler_id,
+ &icm_rx, &icm_tx);
+ if (ret)
+ return NULL;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_SAMPLER);
+ if (!action)
+ return NULL;
+
+ action->sampler->dmn = dmn;
+ action->sampler->sampler_id = sampler_id;
+ action->sampler->rx_icm_addr = icm_rx;
+ action->sampler->tx_icm_addr = icm_tx;
+ refcount_inc(&dmn->refcount);
return action;
}
static int
dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
struct mlx5dr_domain *dmn,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
size_t data_sz,
void *data)
{
- if ((!data && data_sz) || (data && !data_sz) || reformat_type >
- DR_ACTION_TYP_L2_TO_TNL_L3) {
- mlx5dr_dbg(dmn, "Invalid reformat parameter!\n");
+ if (reformat_type == DR_ACTION_TYP_INSERT_HDR) {
+ if ((!data && data_sz) || (data && !data_sz) ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_size) < data_sz ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_offset) < reformat_param_1) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters for INSERT_HDR\n");
+ goto out_err;
+ }
+ } else if (reformat_type == DR_ACTION_TYP_REMOVE_HDR) {
+ if (data ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_size) < data_sz ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_offset) < reformat_param_1) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters for REMOVE_HDR\n");
+ goto out_err;
+ }
+ } else if (reformat_param_0 || reformat_param_1 ||
+ reformat_type > DR_ACTION_TYP_REMOVE_HDR) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters\n");
goto out_err;
}
@@ -1168,6 +1166,7 @@ out_err:
static int
dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
+ u8 reformat_param_0, u8 reformat_param_1,
size_t data_sz, void *data,
struct mlx5dr_action *action)
{
@@ -1185,13 +1184,14 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
else
rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
- ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data,
+ ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, 0, 0,
+ data_sz, data,
&reformat_id);
if (ret)
return ret;
- action->reformat.reformat_id = reformat_id;
- action->reformat.reformat_size = data_sz;
+ action->reformat->id = reformat_id;
+ action->reformat->size = data_sz;
return 0;
}
case DR_ACTION_TYP_TNL_L2_TO_L2:
@@ -1200,32 +1200,70 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
}
case DR_ACTION_TYP_TNL_L3_TO_L2:
{
- /* Only Ethernet frame is supported, with VLAN (18) or without (14) */
- if (data_sz != HDR_LEN_L2_ONLY && data_sz != HDR_LEN_L2_VLAN)
- return -EINVAL;
+ u8 hw_actions[ACTION_CACHE_LINE_SIZE] = {};
+ int ret;
+
+ ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx,
+ data, data_sz,
+ hw_actions,
+ ACTION_CACHE_LINE_SIZE,
+ &action->rewrite->num_of_actions);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n");
+ return ret;
+ }
- action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
- DR_CHUNK_SIZE_8);
- if (!action->rewrite.chunk)
+ action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+ DR_CHUNK_SIZE_8);
+ if (!action->rewrite->chunk) {
+ mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n");
return -ENOMEM;
+ }
- action->rewrite.index = (action->rewrite.chunk->icm_addr -
+ action->rewrite->data = (void *)hw_actions;
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr
+ (action->rewrite->chunk) -
dmn->info.caps.hdr_modify_icm_addr) /
ACTION_CACHE_LINE_SIZE;
- ret = dr_actions_l2_rewrite(dmn, action, data, data_sz);
+ ret = mlx5dr_send_postsend_action(dmn, action);
if (ret) {
- mlx5dr_icm_free_chunk(action->rewrite.chunk);
+ mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n");
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
return ret;
}
return 0;
}
+ case DR_ACTION_TYP_INSERT_HDR:
+ ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev,
+ MLX5_REFORMAT_TYPE_INSERT_HDR,
+ reformat_param_0,
+ reformat_param_1,
+ data_sz, data,
+ &reformat_id);
+ if (ret)
+ return ret;
+
+ action->reformat->id = reformat_id;
+ action->reformat->size = data_sz;
+ action->reformat->param_0 = reformat_param_0;
+ action->reformat->param_1 = reformat_param_1;
+ return 0;
+ case DR_ACTION_TYP_REMOVE_HDR:
+ action->reformat->id = 0;
+ action->reformat->size = data_sz;
+ action->reformat->param_0 = reformat_param_0;
+ action->reformat->param_1 = reformat_param_1;
+ return 0;
default:
mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
return -EINVAL;
}
}
+#define CVLAN_ETHERTYPE 0x8100
+#define SVLAN_ETHERTYPE 0x88a8
+
struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void)
{
return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN);
@@ -1247,13 +1285,15 @@ struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn,
if (!action)
return NULL;
- action->push_vlan.vlan_hdr = vlan_hdr_h;
+ action->push_vlan->vlan_hdr = vlan_hdr_h;
return action;
}
struct mlx5dr_action *
mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
enum mlx5dr_action_reformat_type reformat_type,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
size_t data_sz,
void *data)
{
@@ -1270,7 +1310,9 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
goto dec_ref;
}
- ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data);
+ ret = dr_action_verify_reformat_params(action_type, dmn,
+ reformat_param_0, reformat_param_1,
+ data_sz, data);
if (ret)
goto dec_ref;
@@ -1278,9 +1320,11 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
if (!action)
goto dec_ref;
- action->reformat.dmn = dmn;
+ action->reformat->dmn = dmn;
ret = dr_action_create_reformat_action(dmn,
+ reformat_param_0,
+ reformat_param_1,
data_sz,
data,
action);
@@ -1298,31 +1342,13 @@ dec_ref:
return NULL;
}
-static const struct dr_action_modify_field_conv *
-dr_action_modify_get_hw_info(u16 sw_field)
-{
- const struct dr_action_modify_field_conv *hw_action_info;
-
- if (sw_field >= ARRAY_SIZE(dr_action_conv_arr))
- goto not_found;
-
- hw_action_info = &dr_action_conv_arr[sw_field];
- if (!hw_action_info->end && !hw_action_info->start)
- goto not_found;
-
- return hw_action_info;
-
-not_found:
- return NULL;
-}
-
static int
dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn,
__be64 *sw_action,
__be64 *hw_action,
- const struct dr_action_modify_field_conv **ret_hw_info)
+ const struct mlx5dr_ste_action_modify_field **ret_hw_info)
{
- const struct dr_action_modify_field_conv *hw_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_action_info;
u8 max_length;
u16 sw_field;
u32 data;
@@ -1332,7 +1358,7 @@ dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn,
data = MLX5_GET(set_action_in, sw_action, data);
/* Convert SW data to HW modify action format */
- hw_action_info = dr_action_modify_get_hw_info(sw_field);
+ hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field);
if (!hw_action_info) {
mlx5dr_dbg(dmn, "Modify add action invalid field given\n");
return -EINVAL;
@@ -1340,20 +1366,12 @@ dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn,
max_length = hw_action_info->end - hw_action_info->start + 1;
- MLX5_SET(dr_action_hw_set, hw_action,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_ADD);
-
- MLX5_SET(dr_action_hw_set, hw_action, destination_field_code,
- hw_action_info->hw_field);
-
- MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter,
- hw_action_info->start);
-
- /* PRM defines that length zero specific length of 32bits */
- MLX5_SET(dr_action_hw_set, hw_action, destination_length,
- max_length == 32 ? 0 : max_length);
-
- MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+ mlx5dr_ste_set_action_add(dmn->ste_ctx,
+ hw_action,
+ hw_action_info->hw_field,
+ hw_action_info->start,
+ max_length,
+ data);
*ret_hw_info = hw_action_info;
@@ -1364,9 +1382,9 @@ static int
dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn,
__be64 *sw_action,
__be64 *hw_action,
- const struct dr_action_modify_field_conv **ret_hw_info)
+ const struct mlx5dr_ste_action_modify_field **ret_hw_info)
{
- const struct dr_action_modify_field_conv *hw_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_action_info;
u8 offset, length, max_length;
u16 sw_field;
u32 data;
@@ -1378,7 +1396,7 @@ dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn,
data = MLX5_GET(set_action_in, sw_action, data);
/* Convert SW data to HW modify action format */
- hw_action_info = dr_action_modify_get_hw_info(sw_field);
+ hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field);
if (!hw_action_info) {
mlx5dr_dbg(dmn, "Modify set action invalid field given\n");
return -EINVAL;
@@ -1394,19 +1412,12 @@ dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn,
return -EINVAL;
}
- MLX5_SET(dr_action_hw_set, hw_action,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
-
- MLX5_SET(dr_action_hw_set, hw_action, destination_field_code,
- hw_action_info->hw_field);
-
- MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter,
- hw_action_info->start + offset);
-
- MLX5_SET(dr_action_hw_set, hw_action, destination_length,
- length == 32 ? 0 : length);
-
- MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+ mlx5dr_ste_set_action_set(dmn->ste_ctx,
+ hw_action,
+ hw_action_info->hw_field,
+ hw_action_info->start + offset,
+ length,
+ data);
*ret_hw_info = hw_action_info;
@@ -1417,12 +1428,12 @@ static int
dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn,
__be64 *sw_action,
__be64 *hw_action,
- const struct dr_action_modify_field_conv **ret_dst_hw_info,
- const struct dr_action_modify_field_conv **ret_src_hw_info)
+ const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info,
+ const struct mlx5dr_ste_action_modify_field **ret_src_hw_info)
{
u8 src_offset, dst_offset, src_max_length, dst_max_length, length;
- const struct dr_action_modify_field_conv *hw_dst_action_info;
- const struct dr_action_modify_field_conv *hw_src_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
u16 src_field, dst_field;
/* Get SW modify action data */
@@ -1433,8 +1444,8 @@ dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn,
length = MLX5_GET(copy_action_in, sw_action, length);
/* Convert SW data to HW modify action format */
- hw_src_action_info = dr_action_modify_get_hw_info(src_field);
- hw_dst_action_info = dr_action_modify_get_hw_info(dst_field);
+ hw_src_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, src_field);
+ hw_dst_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, dst_field);
if (!hw_src_action_info || !hw_dst_action_info) {
mlx5dr_dbg(dmn, "Modify copy action invalid field given\n");
return -EINVAL;
@@ -1454,23 +1465,13 @@ dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn,
return -EINVAL;
}
- MLX5_SET(dr_action_hw_copy, hw_action,
- opcode, MLX5DR_ACTION_MDFY_HW_OP_COPY);
-
- MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code,
- hw_dst_action_info->hw_field);
-
- MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter,
- hw_dst_action_info->start + dst_offset);
-
- MLX5_SET(dr_action_hw_copy, hw_action, destination_length,
- length == 32 ? 0 : length);
-
- MLX5_SET(dr_action_hw_copy, hw_action, source_field_code,
- hw_src_action_info->hw_field);
-
- MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter,
- hw_src_action_info->start + dst_offset);
+ mlx5dr_ste_set_action_copy(dmn->ste_ctx,
+ hw_action,
+ hw_dst_action_info->hw_field,
+ hw_dst_action_info->start + dst_offset,
+ length,
+ hw_src_action_info->hw_field,
+ hw_src_action_info->start + src_offset);
*ret_dst_hw_info = hw_dst_action_info;
*ret_src_hw_info = hw_src_action_info;
@@ -1482,8 +1483,8 @@ static int
dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
__be64 *sw_action,
__be64 *hw_action,
- const struct dr_action_modify_field_conv **ret_dst_hw_info,
- const struct dr_action_modify_field_conv **ret_src_hw_info)
+ const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info,
+ const struct mlx5dr_ste_action_modify_field **ret_src_hw_info)
{
u8 action;
int ret;
@@ -1527,17 +1528,17 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action,
const __be64 *sw_action)
{
u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
- struct mlx5dr_domain *dmn = action->rewrite.dmn;
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
- action->rewrite.allow_rx = 0;
+ action->rewrite->allow_rx = 0;
if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
sw_field);
return -EINVAL;
}
} else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
- action->rewrite.allow_tx = 0;
+ action->rewrite->allow_tx = 0;
if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
sw_field);
@@ -1545,7 +1546,7 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action,
}
}
- if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) {
+ if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) {
mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n");
return -EINVAL;
}
@@ -1558,7 +1559,7 @@ dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action,
const __be64 *sw_action)
{
u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
- struct mlx5dr_domain *dmn = action->rewrite.dmn;
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
@@ -1576,7 +1577,7 @@ static int
dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
const __be64 *sw_action)
{
- struct mlx5dr_domain *dmn = action->rewrite.dmn;
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
u16 sw_fields[2];
int i;
@@ -1585,14 +1586,14 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
for (i = 0; i < 2; i++) {
if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
- action->rewrite.allow_rx = 0;
+ action->rewrite->allow_rx = 0;
if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
sw_fields[i]);
return -EINVAL;
}
} else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
- action->rewrite.allow_tx = 0;
+ action->rewrite->allow_tx = 0;
if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
sw_fields[i]);
@@ -1601,7 +1602,7 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
}
}
- if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) {
+ if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) {
mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n");
return -EINVAL;
}
@@ -1613,7 +1614,7 @@ static int
dr_action_modify_check_field_limitation(struct mlx5dr_action *action,
const __be64 *sw_action)
{
- struct mlx5dr_domain *dmn = action->rewrite.dmn;
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
u8 action_type;
int ret;
@@ -1645,7 +1646,7 @@ dr_action_modify_check_field_limitation(struct mlx5dr_action *action,
}
static bool
-dr_action_modify_check_is_ttl_modify(const u64 *sw_action)
+dr_action_modify_check_is_ttl_modify(const void *sw_action)
{
u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
@@ -1660,31 +1661,42 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
u32 *num_hw_actions,
bool *modify_ttl)
{
- const struct dr_action_modify_field_conv *hw_dst_action_info;
- const struct dr_action_modify_field_conv *hw_src_action_info;
- u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED;
- u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE;
- u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE;
- struct mlx5dr_domain *dmn = action->rewrite.dmn;
+ const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ __be64 *modify_ttl_sw_action = NULL;
int ret, i, hw_idx = 0;
__be64 *sw_action;
__be64 hw_action;
+ u16 hw_field = 0;
+ u32 l3_type = 0;
+ u32 l4_type = 0;
*modify_ttl = false;
- action->rewrite.allow_rx = 1;
- action->rewrite.allow_tx = 1;
+ action->rewrite->allow_rx = 1;
+ action->rewrite->allow_tx = 1;
- for (i = 0; i < num_sw_actions; i++) {
- sw_action = &sw_actions[i];
+ for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) {
+ /* modify TTL is handled separately, as a last action */
+ if (i == num_sw_actions) {
+ sw_action = modify_ttl_sw_action;
+ modify_ttl_sw_action = NULL;
+ } else {
+ sw_action = &sw_actions[i];
+ }
ret = dr_action_modify_check_field_limitation(action,
sw_action);
if (ret)
return ret;
- if (!(*modify_ttl))
- *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action);
+ if (!(*modify_ttl) &&
+ dr_action_modify_check_is_ttl_modify(sw_action)) {
+ modify_ttl_sw_action = sw_action;
+ *modify_ttl = true;
+ continue;
+ }
/* Convert SW action to HW action */
ret = dr_action_modify_sw_to_hw(dmn,
@@ -1723,7 +1735,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
* modify actions doesn't exceeds the limit
*/
hw_idx++;
- if ((num_sw_actions + hw_idx - i) >= max_hw_actions) {
+ if (hw_idx >= max_hw_actions) {
mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n");
return -EINVAL;
}
@@ -1734,6 +1746,10 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
hw_idx++;
}
+ /* if the resulting HW actions list is empty, add NOP action */
+ if (!hw_idx)
+ hw_idx++;
+
*num_hw_actions = hw_idx;
return 0;
@@ -1781,13 +1797,13 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
if (ret)
goto free_hw_actions;
- action->rewrite.chunk = chunk;
- action->rewrite.modify_ttl = modify_ttl;
- action->rewrite.data = (u8 *)hw_actions;
- action->rewrite.num_of_actions = num_hw_actions;
- action->rewrite.index = (chunk->icm_addr -
- dmn->info.caps.hdr_modify_icm_addr) /
- ACTION_CACHE_LINE_SIZE;
+ action->rewrite->chunk = chunk;
+ action->rewrite->modify_ttl = modify_ttl;
+ action->rewrite->data = (u8 *)hw_actions;
+ action->rewrite->num_of_actions = num_hw_actions;
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ ACTION_CACHE_LINE_SIZE;
ret = mlx5dr_send_postsend_action(dmn, action);
if (ret)
@@ -1822,7 +1838,7 @@ mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn,
if (!action)
goto dec_ref;
- action->rewrite.dmn = dmn;
+ action->rewrite->dmn = dmn;
ret = dr_action_create_modify_action(dmn,
actions_sz,
@@ -1844,7 +1860,7 @@ dec_ref:
struct mlx5dr_action *
mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
- u32 vport, u8 vhca_id_valid,
+ u16 vport, u8 vhca_id_valid,
u16 vhca_id)
{
struct mlx5dr_cmd_vport_cap *vport_cap;
@@ -1864,9 +1880,11 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
return NULL;
}
- vport_cap = mlx5dr_get_vport_cap(&vport_dmn->info.caps, vport);
+ vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, vport);
if (!vport_cap) {
- mlx5dr_dbg(dmn, "Failed to get vport %d caps\n", vport);
+ mlx5dr_err(dmn,
+ "Failed to get vport 0x%x caps - vport is disabled or invalid\n",
+ vport);
return NULL;
}
@@ -1874,57 +1892,93 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
if (!action)
return NULL;
- action->vport.dmn = vport_dmn;
- action->vport.caps = vport_cap;
+ action->vport->dmn = vport_dmn;
+ action->vport->caps = vport_cap;
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id,
+ u8 dest_reg_id, u8 aso_type,
+ u8 init_color, u8 meter_id)
+{
+ struct mlx5dr_action *action;
+
+ if (aso_type != MLX5_EXE_ASO_FLOW_METER)
+ return NULL;
+
+ if (init_color > MLX5_FLOW_METER_COLOR_UNDEFINED)
+ return NULL;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_ASO_FLOW_METER);
+ if (!action)
+ return NULL;
+
+ action->aso->obj_id = obj_id;
+ action->aso->offset = meter_id;
+ action->aso->dest_reg_id = dest_reg_id;
+ action->aso->init_color = init_color;
+ action->aso->dmn = dmn;
+
+ refcount_inc(&dmn->refcount);
return action;
}
int mlx5dr_action_destroy(struct mlx5dr_action *action)
{
- if (refcount_read(&action->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1))
return -EBUSY;
switch (action->action_type) {
case DR_ACTION_TYP_FT:
- if (action->dest_tbl.is_fw_tbl)
- refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount);
+ if (action->dest_tbl->is_fw_tbl)
+ refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount);
else
- refcount_dec(&action->dest_tbl.tbl->refcount);
+ refcount_dec(&action->dest_tbl->tbl->refcount);
- if (action->dest_tbl.is_fw_tbl &&
- action->dest_tbl.fw_tbl.num_of_ref_actions) {
+ if (action->dest_tbl->is_fw_tbl &&
+ action->dest_tbl->fw_tbl.num_of_ref_actions) {
struct mlx5dr_action **ref_actions;
int i;
- ref_actions = action->dest_tbl.fw_tbl.ref_actions;
- for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++)
+ ref_actions = action->dest_tbl->fw_tbl.ref_actions;
+ for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++)
refcount_dec(&ref_actions[i]->refcount);
kfree(ref_actions);
- mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn,
- action->dest_tbl.fw_tbl.id,
- action->dest_tbl.fw_tbl.group_id);
+ mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn,
+ action->dest_tbl->fw_tbl.id,
+ action->dest_tbl->fw_tbl.group_id);
}
break;
case DR_ACTION_TYP_TNL_L2_TO_L2:
- refcount_dec(&action->reformat.dmn->refcount);
+ case DR_ACTION_TYP_REMOVE_HDR:
+ refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- mlx5dr_icm_free_chunk(action->rewrite.chunk);
- refcount_dec(&action->reformat.dmn->refcount);
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ refcount_dec(&action->rewrite->dmn->refcount);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
case DR_ACTION_TYP_L2_TO_TNL_L3:
- mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev,
- action->reformat.reformat_id);
- refcount_dec(&action->reformat.dmn->refcount);
+ case DR_ACTION_TYP_INSERT_HDR:
+ mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev,
+ action->reformat->id);
+ refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_MODIFY_HDR:
- mlx5dr_icm_free_chunk(action->rewrite.chunk);
- kfree(action->rewrite.data);
- refcount_dec(&action->rewrite.dmn->refcount);
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ kfree(action->rewrite->data);
+ refcount_dec(&action->rewrite->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ refcount_dec(&action->sampler->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_ASO_FLOW_METER:
+ refcount_dec(&action->aso->dmn->refcount);
break;
default:
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c
new file mode 100644
index 000000000000..7df11a019df9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 - 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006 - 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
+ */
+
+#include "dr_types.h"
+
+int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int max_order)
+{
+ int i;
+
+ buddy->max_order = max_order;
+
+ INIT_LIST_HEAD(&buddy->list_node);
+ INIT_LIST_HEAD(&buddy->used_list);
+ INIT_LIST_HEAD(&buddy->hot_list);
+
+ buddy->bitmap = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->bitmap),
+ GFP_KERNEL);
+ buddy->num_free = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->num_free),
+ GFP_KERNEL);
+
+ if (!buddy->bitmap || !buddy->num_free)
+ goto err_free_all;
+
+ /* Allocating max_order bitmaps, one for each order */
+
+ for (i = 0; i <= buddy->max_order; ++i) {
+ unsigned int size = 1 << (buddy->max_order - i);
+
+ buddy->bitmap[i] = bitmap_zalloc(size, GFP_KERNEL);
+ if (!buddy->bitmap[i])
+ goto err_out_free_each_bit_per_order;
+ }
+
+ /* In the beginning, we have only one order that is available for
+ * use (the biggest one), so mark the first bit in both bitmaps.
+ */
+
+ bitmap_set(buddy->bitmap[buddy->max_order], 0, 1);
+
+ buddy->num_free[buddy->max_order] = 1;
+
+ return 0;
+
+err_out_free_each_bit_per_order:
+ for (i = 0; i <= buddy->max_order; ++i)
+ bitmap_free(buddy->bitmap[i]);
+
+err_free_all:
+ kfree(buddy->num_free);
+ kfree(buddy->bitmap);
+ return -ENOMEM;
+}
+
+void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int i;
+
+ list_del(&buddy->list_node);
+
+ for (i = 0; i <= buddy->max_order; ++i)
+ bitmap_free(buddy->bitmap[i]);
+
+ kfree(buddy->num_free);
+ kfree(buddy->bitmap);
+}
+
+static int dr_buddy_find_free_seg(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int start_order,
+ unsigned int *segment,
+ unsigned int *order)
+{
+ unsigned int seg, order_iter, m;
+
+ for (order_iter = start_order;
+ order_iter <= buddy->max_order; ++order_iter) {
+ if (!buddy->num_free[order_iter])
+ continue;
+
+ m = 1 << (buddy->max_order - order_iter);
+ seg = find_first_bit(buddy->bitmap[order_iter], m);
+
+ if (WARN(seg >= m,
+ "ICM Buddy: failed finding free mem for order %d\n",
+ order_iter))
+ return -ENOMEM;
+
+ break;
+ }
+
+ if (order_iter > buddy->max_order)
+ return -ENOMEM;
+
+ *segment = seg;
+ *order = order_iter;
+ return 0;
+}
+
+/**
+ * mlx5dr_buddy_alloc_mem() - Update second level bitmap.
+ * @buddy: Buddy to update.
+ * @order: Order of the buddy to update.
+ * @segment: Segment number.
+ *
+ * This function finds the first area of the ICM memory managed by this buddy.
+ * It uses the data structures of the buddy system in order to find the first
+ * area of free place, starting from the current order till the maximum order
+ * in the system.
+ *
+ * Return: 0 when segment is set, non-zero error status otherwise.
+ *
+ * The function returns the location (segment) in the whole buddy ICM memory
+ * area - the index of the memory segment that is available for use.
+ */
+int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int order,
+ unsigned int *segment)
+{
+ unsigned int seg, order_iter;
+ int err;
+
+ err = dr_buddy_find_free_seg(buddy, order, &seg, &order_iter);
+ if (err)
+ return err;
+
+ bitmap_clear(buddy->bitmap[order_iter], seg, 1);
+ --buddy->num_free[order_iter];
+
+ /* If we found free memory in some order that is bigger than the
+ * required order, we need to split every order between the required
+ * order and the order that we found into two parts, and mark accordingly.
+ */
+ while (order_iter > order) {
+ --order_iter;
+ seg <<= 1;
+ bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1);
+ ++buddy->num_free[order_iter];
+ }
+
+ seg <<= order;
+ *segment = seg;
+
+ return 0;
+}
+
+void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int seg, unsigned int order)
+{
+ seg >>= order;
+
+ /* Whenever a segment is free,
+ * the mem is added to the buddy that gave it.
+ */
+ while (test_bit(seg ^ 1, buddy->bitmap[order])) {
+ bitmap_clear(buddy->bitmap[order], seg ^ 1, 1);
+ --buddy->num_free[order];
+ seg >>= 1;
+ ++order;
+ }
+ bitmap_set(buddy->bitmap[order], seg, 1);
+
+ ++buddy->num_free[order];
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 461b39376daf..16d65fe4f654 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -18,7 +18,7 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport);
MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in, out);
if (err)
return err;
@@ -51,7 +51,7 @@ int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
HCA_CAP_OPMOD_GET_CUR);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+ err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
if (err) {
kfree(out);
return err;
@@ -78,33 +78,107 @@ int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
caps->uplink_icm_address_tx =
MLX5_CAP64_ESW_FLOWTABLE(mdev,
sw_steering_uplink_icm_address_tx);
- caps->sw_owner =
- MLX5_CAP_ESW_FLOWTABLE_FDB(mdev,
- sw_owner);
+ caps->sw_owner_v2 = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2);
+ if (!caps->sw_owner_v2)
+ caps->sw_owner = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner);
return 0;
}
+static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev,
+ u16 vport, bool *roce_en)
+{
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
+ int err;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *roce_en = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.roce_en);
+ return 0;
+}
+
int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
struct mlx5dr_cmd_caps *caps)
{
+ bool roce_en;
+ int err;
+
caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required);
caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager);
caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
+ caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
+
+ if (MLX5_CAP_GEN(mdev, roce)) {
+ err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
+ if (err)
+ return err;
+
+ caps->roce_caps.roce_en = roce_en;
+ caps->roce_caps.fl_rc_qp_when_roce_disabled =
+ MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
+ caps->roce_caps.fl_rc_qp_when_roce_enabled =
+ MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
+ }
+
+ caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new);
- if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) {
+ /* geneve_tlv_option_0_exist is the indication of
+ * STE support for lookup type flex_parser_ok
+ */
+ caps->flex_parser_ok_bits_supp =
+ MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) {
caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
}
- if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) {
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED) {
caps->flex_parser_id_icmpv6_dw0 =
MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0);
caps->flex_parser_id_icmpv6_dw1 =
MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1);
}
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED)
+ caps->flex_parser_id_geneve_tlv_option_0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED)
+ caps->flex_parser_id_mpls_over_gre =
+ MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)
+ caps->flex_parser_id_mpls_over_udp =
+ MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED)
+ caps->flex_parser_id_gtpu_dw_0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED)
+ caps->flex_parser_id_gtpu_teid =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED)
+ caps->flex_parser_id_gtpu_dw_2 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED)
+ caps->flex_parser_id_gtpu_first_ext_dw_0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0);
+
caps->nic_rx_drop_address =
MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
caps->nic_tx_drop_address =
@@ -112,10 +186,15 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->nic_tx_allow_address =
MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address);
- caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner);
- caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level);
+ caps->rx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner_v2);
+ caps->tx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner_v2);
- caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner);
+ if (!caps->rx_sw_owner_v2)
+ caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner);
+ if (!caps->tx_sw_owner_v2)
+ caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner);
+
+ caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level);
caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size);
caps->hdr_modify_icm_addr =
@@ -123,6 +202,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port);
+ caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev);
+
return 0;
}
@@ -141,7 +222,7 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
MLX5_SET(query_flow_table_in, in, table_type, type);
MLX5_SET(query_flow_table_in, in, table_id, table_id);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, query_flow_table, in, out);
if (err)
return err;
@@ -156,14 +237,43 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
return 0;
}
+int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev,
+ u32 sampler_id,
+ u64 *rx_icm_addr,
+ u64 *tx_icm_addr)
+{
+ u32 out[MLX5_ST_SZ_DW(query_sampler_obj_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ void *attr;
+ int ret;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ attr = MLX5_ADDR_OF(query_sampler_obj_out, out, sampler_object);
+
+ *rx_icm_addr = MLX5_GET64(sampler_obj, attr,
+ sw_steering_icm_address_rx);
+ *tx_icm_addr = MLX5_GET64(sampler_obj, attr,
+ sw_steering_icm_address_tx);
+
+ return 0;
+}
+
int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev)
{
- u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {};
u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {};
MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, sync_steering, in);
}
int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
@@ -171,7 +281,7 @@ int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
u32 table_id,
u32 group_id,
u32 modify_header_id,
- u32 vport_id)
+ u16 vport)
{
u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
void *in_flow_context;
@@ -201,8 +311,8 @@ int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
MLX5_SET(dest_format_struct, in_dests, destination_type,
- MLX5_FLOW_DESTINATION_TYPE_VPORT);
- MLX5_SET(dest_format_struct, in_dests, destination_id, vport_id);
+ MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, vport);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
kvfree(in);
@@ -214,14 +324,13 @@ int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
u32 table_type,
u32 table_id)
{
- u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {};
u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
MLX5_SET(delete_fte_in, in, table_type, table_type);
MLX5_SET(delete_fte_in, in, table_id, table_id);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, delete_fte, in);
}
int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
@@ -263,7 +372,6 @@ out:
int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
u32 modify_header_id)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {};
u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
MLX5_SET(dealloc_modify_header_context_in, in, opcode,
@@ -271,7 +379,7 @@ int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
modify_header_id);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, dealloc_modify_header_context, in);
}
int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
@@ -284,7 +392,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
u32 *in;
int err;
- in = kzalloc(inlen, GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -292,14 +400,14 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
MLX5_SET(create_flow_group_in, in, table_type, table_type);
MLX5_SET(create_flow_group_in, in, table_id, table_id);
- err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, create_flow_group, in, out);
if (err)
goto out;
*group_id = MLX5_GET(create_flow_group_out, out, group_id);
out:
- kfree(in);
+ kvfree(in);
return err;
}
@@ -309,14 +417,14 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
u32 group_id)
{
u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {};
- MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ MLX5_SET(destroy_flow_group_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
MLX5_SET(destroy_flow_group_in, in, table_type, table_type);
MLX5_SET(destroy_flow_group_in, in, table_id, table_id);
MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, destroy_flow_group, in);
}
int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
@@ -331,6 +439,7 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
MLX5_SET(create_flow_table_in, in, table_type, attr->table_type);
+ MLX5_SET(create_flow_table_in, in, uid, attr->uid);
ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl);
@@ -360,7 +469,7 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
attr->reformat_en);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, create_flow_table, in, out);
if (err)
return err;
@@ -379,7 +488,6 @@ int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
u32 table_id,
u32 table_type)
{
- u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {};
u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
MLX5_SET(destroy_flow_table_in, in, opcode,
@@ -387,11 +495,13 @@ int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
MLX5_SET(destroy_flow_table_in, in, table_type, table_type);
MLX5_SET(destroy_flow_table_in, in, table_id, table_id);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, destroy_flow_table, in);
}
int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
enum mlx5_reformat_ctx_type rt,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
size_t reformat_size,
void *reformat_data,
u32 *reformat_id)
@@ -418,8 +528,11 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data);
MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt);
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_param_0, reformat_param_0);
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_param_1, reformat_param_1);
MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size);
- memcpy(pdata, reformat_data, reformat_size);
+ if (reformat_data && reformat_size)
+ memcpy(pdata, reformat_data, reformat_size);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (err)
@@ -434,7 +547,6 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
u32 reformat_id)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {};
u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
@@ -442,7 +554,7 @@ void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
reformat_id);
- mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in);
}
int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
@@ -458,7 +570,7 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
MLX5_SET(query_roce_address_in, in, roce_address_index, index);
MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, query_roce_address, in, out);
if (err)
return err;
@@ -493,9 +605,11 @@ static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return 0;
for (i = 0; i < fte->dests_size; i++) {
- if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ||
+ fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_NONE)
continue;
- if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
num_encap++;
num_fwd_destinations++;
@@ -553,6 +667,7 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(set_fte_in, in, table_type, ft->type);
MLX5_SET(set_fte_in, in, table_id, ft->id);
MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ MLX5_SET(set_fte_in, in, ignore_flow_level, fte->ignore_flow_level);
if (ft->vport) {
MLX5_SET(set_fte_in, in, vport_number, ft->vport);
MLX5_SET(set_fte_in, in, other_vport, 1);
@@ -606,25 +721,40 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
int list_size = 0;
for (i = 0; i < fte->dests_size; i++) {
- unsigned int id, type = fte->dest_arr[i].type;
+ enum mlx5_flow_destination_type type = fte->dest_arr[i].type;
+ enum mlx5_ifc_flow_destination_type ifc_type;
+ unsigned int id;
if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ continue;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
id = fte->dest_arr[i].ft_num;
- type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
id = fte->dest_arr[i].ft_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
- id = fte->dest_arr[i].vport.num;
- MLX5_SET(dest_format_struct, in_dests,
- destination_eswitch_owner_vhca_id_valid,
- !!(fte->dest_arr[i].vport.flags &
- MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+ id = fte->dest_arr[i].vport.num;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ !!(fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
+ } else {
+ id = 0;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid, 1);
+ }
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
fte->dest_arr[i].vport.vhca_id);
@@ -639,12 +769,17 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
fte->dest_arr[i].vport.reformat_id);
}
break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ id = fte->dest_arr[i].sampler_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ break;
default:
id = fte->dest_arr[i].tir_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
}
MLX5_SET(dest_format_struct, in_dests, destination_type,
- type);
+ ifc_type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
in_dests += dst_cnt_size;
list_size++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
new file mode 100644
index 000000000000..7adcf0eec13b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include "dr_types.h"
+
+#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL)
+
+enum dr_dump_rec_type {
+ DR_DUMP_REC_TYPE_DOMAIN = 3000,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004,
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005,
+
+ DR_DUMP_REC_TYPE_TABLE = 3100,
+ DR_DUMP_REC_TYPE_TABLE_RX = 3101,
+ DR_DUMP_REC_TYPE_TABLE_TX = 3102,
+
+ DR_DUMP_REC_TYPE_MATCHER = 3200,
+ DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201,
+ DR_DUMP_REC_TYPE_MATCHER_RX = 3202,
+ DR_DUMP_REC_TYPE_MATCHER_TX = 3203,
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204,
+ DR_DUMP_REC_TYPE_MATCHER_MASK = 3205,
+
+ DR_DUMP_REC_TYPE_RULE = 3300,
+ DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301,
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302,
+ DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303,
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304,
+
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400,
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401,
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402,
+ DR_DUMP_REC_TYPE_ACTION_DROP = 3403,
+ DR_DUMP_REC_TYPE_ACTION_QP = 3404,
+ DR_DUMP_REC_TYPE_ACTION_FT = 3405,
+ DR_DUMP_REC_TYPE_ACTION_CTR = 3406,
+ DR_DUMP_REC_TYPE_ACTION_TAG = 3407,
+ DR_DUMP_REC_TYPE_ACTION_VPORT = 3408,
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409,
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410,
+ DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411,
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412,
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413,
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415,
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420,
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421
+};
+
+void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
+ list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list);
+ mutex_unlock(&tbl->dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
+ list_del(&tbl->dbg_node);
+ mutex_unlock(&tbl->dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ list_del(&rule->dbg_node);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+}
+
+static u64 dr_dump_icm_to_idx(u64 icm_addr)
+{
+ return (icm_addr >> 6) & 0xffffffff;
+}
+
+#define DR_HEX_SIZE 256
+
+static void
+dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
+{
+ if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1))
+ size = DR_HEX_SIZE / 2 - 1; /* truncate */
+
+ bin2hex(hex, src, size);
+ hex[2 * size] = 0; /* NULL-terminate */
+}
+
+static int
+dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+ struct mlx5dr_rule_action_member *action_mem)
+{
+ struct mlx5dr_action *action = action_mem->action;
+ const u64 action_id = DR_DBG_PTR_TO_ID(action);
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_DROP:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id);
+ break;
+ case DR_ACTION_TYP_FT:
+ if (action->dest_tbl->is_fw_tbl)
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->fw_tbl.id,
+ -1);
+ else
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->tbl->table_id,
+ DR_DBG_PTR_TO_ID(action->dest_tbl->tbl));
+
+ break;
+ case DR_ACTION_TYP_CTR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id,
+ action->ctr->ctr_id + action->ctr->offset);
+ break;
+ case DR_ACTION_TYP_TAG:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id,
+ action->flow_tag->flow_tag);
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
+ rule_id, action->rewrite->index);
+ break;
+ case DR_ACTION_TYP_VPORT:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
+ action->vport->caps->num);
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id,
+ rule_id);
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
+ rule_id, action->rewrite->index);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id,
+ rule_id, action->reformat->id);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id,
+ rule_id, action->reformat->id);
+ break;
+ case DR_ACTION_TYP_POP_VLAN:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id,
+ rule_id);
+ break;
+ case DR_ACTION_TYP_PUSH_VLAN:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id,
+ rule_id, action->push_vlan->vlan_hdr);
+ break;
+ case DR_ACTION_TYP_INSERT_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ break;
+ case DR_ACTION_TYP_REMOVE_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ seq_printf(file,
+ "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id,
+ 0, 0, action->sampler->sampler_id,
+ action->sampler->rx_icm_addr,
+ action->sampler->tx_icm_addr);
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+ bool is_rx, const u64 rule_id, u8 format_ver)
+{
+ char hw_ste_dump[DR_HEX_SIZE];
+ u32 mem_rec_type;
+
+ if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) {
+ mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 :
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0;
+ } else {
+ mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 :
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1;
+ }
+
+ dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste),
+ DR_STE_SIZE_REDUCED);
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type,
+ dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id,
+ hw_ste_dump);
+
+ return 0;
+}
+
+static int
+dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+ bool is_rx, const u64 rule_id, u8 format_ver)
+{
+ struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+ struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste;
+ int ret, i;
+
+ if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
+ return 0;
+
+ while (i--) {
+ ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
+ struct mlx5dr_rule_rx_tx *rx = &rule->rx;
+ struct mlx5dr_rule_rx_tx *tx = &rule->tx;
+ u8 format_ver;
+ int ret;
+
+ format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver;
+
+ seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id,
+ DR_DBG_PTR_TO_ID(rule->matcher));
+
+ if (rx->nic_matcher) {
+ ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_matcher) {
+ ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
+ ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+ u8 criteria, const u64 matcher_id)
+{
+ char dump[DR_HEX_SIZE];
+
+ seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK,
+ matcher_id);
+
+ if (criteria & DR_MATCHER_CRITERIA_OUTER) {
+ dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_INNER) {
+ dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC) {
+ dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC2) {
+ dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC3) {
+ dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3));
+ seq_printf(file, "%s\n", dump);
+ } else {
+ seq_puts(file, ",\n");
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+ u32 index, bool is_rx, const u64 matcher_id)
+{
+ seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n",
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx,
+ builder->lu_type);
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+ struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
+ const u64 matcher_id)
+{
+ enum dr_dump_rec_type rec_type;
+ u64 s_icm_addr, e_icm_addr;
+ int i, ret;
+
+ rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX :
+ DR_DUMP_REC_TYPE_MATCHER_TX;
+
+ s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk);
+ e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk);
+ seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n",
+ rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx),
+ matcher_id, matcher_rx_tx->num_of_builders,
+ dr_dump_icm_to_idx(s_icm_addr),
+ dr_dump_icm_to_idx(e_icm_addr));
+
+ for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
+ ret = dr_dump_matcher_builder(file,
+ &matcher_rx_tx->ste_builder[i],
+ i, is_rx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
+ struct mlx5dr_matcher_rx_tx *tx = &matcher->tx;
+ u64 matcher_id;
+ int ret;
+
+ matcher_id = DR_DBG_PTR_TO_ID(matcher);
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER,
+ matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio);
+
+ ret = dr_dump_matcher_mask(file, &matcher->mask,
+ matcher->match_criteria, matcher_id);
+ if (ret < 0)
+ return ret;
+
+ if (rx->nic_tbl) {
+ ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_tbl) {
+ ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_rule *rule;
+ int ret;
+
+ ret = dr_dump_matcher(file, matcher);
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) {
+ ret = dr_dump_rule(file, rule);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+ struct mlx5dr_table_rx_tx *table_rx_tx,
+ const u64 table_id)
+{
+ enum dr_dump_rec_type rec_type;
+ u64 s_icm_addr;
+
+ rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX :
+ DR_DUMP_REC_TYPE_TABLE_TX;
+
+ s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk);
+ seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id,
+ dr_dump_icm_to_idx(s_icm_addr));
+
+ return 0;
+}
+
+static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+{
+ struct mlx5dr_table_rx_tx *rx = &table->rx;
+ struct mlx5dr_table_rx_tx *tx = &table->tx;
+ int ret;
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE,
+ DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn),
+ table->table_type, table->level);
+
+ if (rx->nic_dmn) {
+ ret = dr_dump_table_rx_tx(file, true, rx,
+ DR_DBG_PTR_TO_ID(table));
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_dmn) {
+ ret = dr_dump_table_rx_tx(file, false, tx,
+ DR_DBG_PTR_TO_ID(table));
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
+{
+ struct mlx5dr_matcher *matcher;
+ int ret;
+
+ ret = dr_dump_table(file, tbl);
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(matcher, &tbl->matcher_list, list_node) {
+ ret = dr_dump_matcher_all(file, matcher);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+ const u64 domain_id)
+{
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring),
+ domain_id, ring->cq->mcq.cqn, ring->qp->qpn);
+ return 0;
+}
+
+static int
+dr_dump_domain_info_flex_parser(struct seq_file *file,
+ const char *flex_parser_name,
+ const u8 flex_parser_value,
+ const u64 domain_id)
+{
+ seq_printf(file, "%d,0x%llx,%s,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id,
+ flex_parser_name, flex_parser_value);
+ return 0;
+}
+
+static int
+dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+ const u64 domain_id)
+{
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ unsigned long i, vports_num;
+
+ xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps)
+ ; /* count the number of vports in xarray */
+
+ seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi,
+ caps->nic_rx_drop_address, caps->nic_tx_drop_address,
+ caps->flex_protocols, vports_num, caps->eswitch_manager);
+
+ xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) {
+ vport_caps = xa_load(&caps->vports.vports_caps_xa, i);
+
+ seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i,
+ vport_caps->vport_gvmi, vport_caps->icm_address_rx,
+ vport_caps->icm_address_tx);
+ }
+ return 0;
+}
+
+static int
+dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+ const u64 domain_id)
+{
+ int ret;
+
+ ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ info->caps.flex_parser_id_icmp_dw0,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ info->caps.flex_parser_id_icmp_dw1,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ info->caps.flex_parser_id_icmpv6_dw0,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ info->caps.flex_parser_id_icmpv6_dw1,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int
+dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
+{
+ u64 domain_id = DR_DBG_PTR_TO_ID(dmn);
+ int ret;
+
+ seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN,
+ domain_id, dmn->type, dmn->info.caps.gvmi,
+ dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev));
+
+ ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ if (ret < 0)
+ return ret;
+
+ if (dmn->info.supp_sw_steering) {
+ ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_table *tbl;
+ int ret;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ mlx5dr_domain_lock(dmn);
+
+ ret = dr_dump_domain(file, dmn);
+ if (ret < 0)
+ goto unlock_mutex;
+
+ list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) {
+ ret = dr_dump_table_all(file, tbl);
+ if (ret < 0)
+ break;
+ }
+
+unlock_mutex:
+ mlx5dr_domain_unlock(dmn);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+ return ret;
+}
+
+static int dr_dump_show(struct seq_file *file, void *priv)
+{
+ return dr_dump_domain_all(file, file->private);
+}
+DEFINE_SHOW_ATTRIBUTE(dr_dump);
+
+void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn)
+{
+ struct mlx5_core_dev *dev = dmn->mdev;
+ char file_name[128];
+
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5_core_warn(dev,
+ "Steering dump is not supported for NIC RX/TX domains\n");
+ return;
+ }
+
+ dmn->dump_info.steering_debugfs =
+ debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev));
+ dmn->dump_info.fdb_debugfs =
+ debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs);
+
+ sprintf(file_name, "dmn_%p", dmn);
+ debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs,
+ dmn, &dr_dump_fops);
+
+ INIT_LIST_HEAD(&dmn->dbg_tbl_list);
+ mutex_init(&dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn)
+{
+ debugfs_remove_recursive(dmn->dump_info.steering_debugfs);
+ mutex_destroy(&dmn->dump_info.dbg_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
new file mode 100644
index 000000000000..def6cf853eea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+struct mlx5dr_dbg_dump_info {
+ struct mutex dbg_mutex; /* protect dbg lists */
+ struct dentry *steering_debugfs;
+ struct dentry *fdb_debugfs;
+};
+
+void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn);
+void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn);
+void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl);
+void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl);
+void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule);
+void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index a9da961d4d2f..fc6ae49b5ecc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -2,50 +2,53 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/mlx5/eswitch.h>
+#include <linux/err.h>
#include "dr_types.h"
-static int dr_domain_init_cache(struct mlx5dr_domain *dmn)
+#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \
+ ((dmn)->info.caps.dmn_type##_sw_owner || \
+ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \
+ (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7))
+
+static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn)
{
/* Per vport cached FW FT for checksum recalculation, this
- * recalculation is needed due to a HW bug.
+ * recalculation is needed due to a HW bug in STEv0.
*/
- dmn->cache.recalc_cs_ft = kcalloc(dmn->info.caps.num_vports,
- sizeof(dmn->cache.recalc_cs_ft[0]),
- GFP_KERNEL);
- if (!dmn->cache.recalc_cs_ft)
- return -ENOMEM;
-
- return 0;
+ xa_init(&dmn->csum_fts_xa);
}
-static void dr_domain_uninit_cache(struct mlx5dr_domain *dmn)
+static void dr_domain_uninit_csum_recalc_fts(struct mlx5dr_domain *dmn)
{
- int i;
-
- for (i = 0; i < dmn->info.caps.num_vports; i++) {
- if (!dmn->cache.recalc_cs_ft[i])
- continue;
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+ unsigned long i;
- mlx5dr_fw_destroy_recalc_cs_ft(dmn, dmn->cache.recalc_cs_ft[i]);
+ xa_for_each(&dmn->csum_fts_xa, i, recalc_cs_ft) {
+ if (recalc_cs_ft)
+ mlx5dr_fw_destroy_recalc_cs_ft(dmn, recalc_cs_ft);
}
- kfree(dmn->cache.recalc_cs_ft);
+ xa_destroy(&dmn->csum_fts_xa);
}
-int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
- u32 vport_num,
- u64 *rx_icm_addr)
+int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+ u16 vport_num,
+ u64 *rx_icm_addr)
{
struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+ int ret;
- recalc_cs_ft = dmn->cache.recalc_cs_ft[vport_num];
+ recalc_cs_ft = xa_load(&dmn->csum_fts_xa, vport_num);
if (!recalc_cs_ft) {
- /* Table not in cache, need to allocate a new one */
+ /* Table hasn't been created yet */
recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num);
if (!recalc_cs_ft)
return -EINVAL;
- dmn->cache.recalc_cs_ft[vport_num] = recalc_cs_ft;
+ ret = xa_err(xa_store(&dmn->csum_fts_xa, vport_num,
+ recalc_cs_ft, GFP_KERNEL));
+ if (ret)
+ return ret;
}
*rx_icm_addr = recalc_cs_ft->rx_icm_addr;
@@ -57,16 +60,22 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
{
int ret;
+ dmn->ste_ctx = mlx5dr_ste_get_ctx(dmn->info.caps.sw_format_ver);
+ if (!dmn->ste_ctx) {
+ mlx5dr_err(dmn, "SW Steering on this device is unsupported\n");
+ return -EOPNOTSUPP;
+ }
+
ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn);
if (ret) {
- mlx5dr_dbg(dmn, "Couldn't allocate PD\n");
+ mlx5dr_err(dmn, "Couldn't allocate PD, ret: %d", ret);
return ret;
}
dmn->uar = mlx5_get_uars_page(dmn->mdev);
- if (!dmn->uar) {
+ if (IS_ERR(dmn->uar)) {
mlx5dr_err(dmn, "Couldn't allocate UAR\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(dmn->uar);
goto clean_pd;
}
@@ -113,15 +122,25 @@ static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn)
mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
}
+static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn,
+ struct mlx5dr_cmd_vport_cap *uplink_vport)
+{
+ struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps;
+
+ uplink_vport->num = MLX5_VPORT_UPLINK;
+ uplink_vport->icm_address_rx = esw_caps->uplink_icm_address_rx;
+ uplink_vport->icm_address_tx = esw_caps->uplink_icm_address_tx;
+ uplink_vport->vport_gvmi = 0;
+ uplink_vport->vhca_gvmi = dmn->info.caps.gvmi;
+}
+
static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
+ u16 vport_number,
bool other_vport,
- u16 vport_number)
+ struct mlx5dr_cmd_vport_cap *vport_caps)
{
- struct mlx5dr_cmd_vport_cap *vport_caps;
int ret;
- vport_caps = &dmn->info.caps.vports_caps[vport_number];
-
ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev,
other_vport,
vport_number,
@@ -143,29 +162,87 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
return 0;
}
-static int dr_domain_query_vports(struct mlx5dr_domain *dmn)
+static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn)
{
- struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps;
- struct mlx5dr_cmd_vport_cap *wire_vport;
- int vport;
+ return dr_domain_query_vport(dmn, 0, false,
+ &dmn->info.caps.vports.esw_manager_caps);
+}
+
+static void dr_domain_query_uplink(struct mlx5dr_domain *dmn)
+{
+ dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps);
+}
+
+static struct mlx5dr_cmd_vport_cap *
+dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+ struct mlx5dr_cmd_vport_cap *vport_caps;
int ret;
- /* Query vports (except wire vport) */
- for (vport = 0; vport < dmn->info.caps.num_esw_ports - 1; vport++) {
- ret = dr_domain_query_vport(dmn, !!vport, vport);
- if (ret)
- return ret;
+ vport_caps = kvzalloc(sizeof(*vport_caps), GFP_KERNEL);
+ if (!vport_caps)
+ return NULL;
+
+ ret = dr_domain_query_vport(dmn, vport, true, vport_caps);
+ if (ret) {
+ kvfree(vport_caps);
+ return NULL;
}
- /* Last vport is the wire port */
- wire_vport = &dmn->info.caps.vports_caps[vport];
- wire_vport->num = WIRE_PORT;
- wire_vport->icm_address_rx = esw_caps->uplink_icm_address_rx;
- wire_vport->icm_address_tx = esw_caps->uplink_icm_address_tx;
- wire_vport->vport_gvmi = 0;
- wire_vport->vhca_gvmi = dmn->info.caps.gvmi;
+ ret = xa_insert(&caps->vports.vports_caps_xa, vport,
+ vport_caps, GFP_KERNEL);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret);
+ kvfree(vport_caps);
+ return ERR_PTR(ret);
+ }
- return 0;
+ return vport_caps;
+}
+
+static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+ return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) ||
+ (!caps->is_ecpf && vport == 0);
+}
+
+struct mlx5dr_cmd_vport_cap *
+mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+
+ if (dr_domain_is_esw_mgr_vport(dmn, vport))
+ return &caps->vports.esw_manager_caps;
+
+ if (vport == MLX5_VPORT_UPLINK)
+ return &caps->vports.uplink_caps;
+
+vport_load:
+ vport_caps = xa_load(&caps->vports.vports_caps_xa, vport);
+ if (vport_caps)
+ return vport_caps;
+
+ vport_caps = dr_domain_add_vport_cap(dmn, vport);
+ if (PTR_ERR(vport_caps) == -EBUSY)
+ /* caps were already stored by another thread */
+ goto vport_load;
+
+ return vport_caps;
+}
+
+static void dr_domain_clear_vports(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ unsigned long i;
+
+ xa_for_each(&dmn->info.caps.vports.vports_caps_xa, i, vport_caps) {
+ vport_caps = xa_erase(&dmn->info.caps.vports.vports_caps_xa, i);
+ kvfree(vport_caps);
+ }
}
static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
@@ -181,28 +258,29 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
return ret;
dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner;
+ dmn->info.caps.fdb_sw_owner_v2 = dmn->info.caps.esw_caps.sw_owner_v2;
dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx;
dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx;
- dmn->info.caps.vports_caps = kcalloc(dmn->info.caps.num_esw_ports,
- sizeof(dmn->info.caps.vports_caps[0]),
- GFP_KERNEL);
- if (!dmn->info.caps.vports_caps)
- return -ENOMEM;
+ xa_init(&dmn->info.caps.vports.vports_caps_xa);
+
+ /* Query eswitch manager and uplink vports only. Rest of the
+ * vports (vport 0, VFs and SFs) will be queried dynamically.
+ */
- ret = dr_domain_query_vports(dmn);
+ ret = dr_domain_query_esw_mngr(dmn);
if (ret) {
- mlx5dr_dbg(dmn, "Failed to query vports caps\n");
- goto free_vports_caps;
+ mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret);
+ goto free_vports_caps_xa;
}
- dmn->info.caps.num_vports = dmn->info.caps.num_esw_ports - 1;
+ dr_domain_query_uplink(dmn);
return 0;
-free_vports_caps:
- kfree(dmn->info.caps.vports_caps);
- dmn->info.caps.vports_caps = NULL;
+free_vports_caps_xa:
+ xa_destroy(&dmn->info.caps.vports.vports_caps_xa);
+
return ret;
}
@@ -213,12 +291,10 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
int ret;
if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
- mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n");
+ mlx5dr_err(dmn, "Failed to allocate domain, bad link type\n");
return -EOPNOTSUPP;
}
- dmn->info.caps.num_esw_ports = mlx5_eswitch_get_total_vports(mdev);
-
ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps);
if (ret)
return ret;
@@ -229,20 +305,20 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
switch (dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
- if (!dmn->info.caps.rx_sw_owner)
+ if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, rx))
return -ENOTSUPP;
dmn->info.supp_sw_steering = true;
- dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+ dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address;
dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address;
break;
case MLX5DR_DOMAIN_TYPE_NIC_TX:
- if (!dmn->info.caps.tx_sw_owner)
+ if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, tx))
return -ENOTSUPP;
dmn->info.supp_sw_steering = true;
- dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+ dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address;
dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address;
break;
@@ -250,16 +326,12 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
if (!dmn->info.caps.eswitch_manager)
return -ENOTSUPP;
- if (!dmn->info.caps.fdb_sw_owner)
+ if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb))
return -ENOTSUPP;
- dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
- dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
- vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0);
- if (!vport_cap) {
- mlx5dr_dbg(dmn, "Failed to get esw manager vport\n");
- return -ENOENT;
- }
+ dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
+ dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
+ vport_cap = &dmn->info.caps.vports.esw_manager_caps;
dmn->info.supp_sw_steering = true;
dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx;
@@ -268,7 +340,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address;
break;
default:
- mlx5dr_dbg(dmn, "Invalid domain\n");
+ mlx5dr_err(dmn, "Invalid domain\n");
ret = -EINVAL;
break;
}
@@ -278,7 +350,8 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn)
{
- kfree(dmn->info.caps.vports_caps);
+ dr_domain_clear_vports(dmn);
+ xa_destroy(&dmn->info.caps.vports.vports_caps_xa);
}
struct mlx5dr_domain *
@@ -297,10 +370,11 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
dmn->mdev = mdev;
dmn->type = type;
refcount_set(&dmn->refcount, 1);
- mutex_init(&dmn->mutex);
+ mutex_init(&dmn->info.rx.mutex);
+ mutex_init(&dmn->info.tx.mutex);
if (dr_domain_caps_init(mdev, dmn)) {
- mlx5dr_dbg(dmn, "Failed init domain, no caps\n");
+ mlx5dr_err(dmn, "Failed init domain, no caps\n");
goto free_domain;
}
@@ -320,16 +394,10 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
goto uninit_caps;
}
- ret = dr_domain_init_cache(dmn);
- if (ret) {
- mlx5dr_err(dmn, "Failed initialize domain cache\n");
- goto uninit_resourses;
- }
-
+ dr_domain_init_csum_recalc_fts(dmn);
+ mlx5dr_dbg_init_dump(dmn);
return dmn;
-uninit_resourses:
- dr_domain_uninit_resources(dmn);
uninit_caps:
dr_domain_caps_uninit(dmn);
free_domain:
@@ -345,11 +413,14 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
int ret = 0;
if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
- mutex_lock(&dmn->mutex);
+ mlx5dr_domain_lock(dmn);
ret = mlx5dr_send_ring_force_drain(dmn);
- mutex_unlock(&dmn->mutex);
- if (ret)
+ mlx5dr_domain_unlock(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
+ flags, ret);
return ret;
+ }
}
if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
@@ -360,15 +431,17 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
{
- if (refcount_read(&dmn->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1))
return -EBUSY;
/* make sure resources are not used by the hardware */
mlx5dr_cmd_sync_steering(dmn->mdev);
- dr_domain_uninit_cache(dmn);
+ mlx5dr_dbg_uninit_dump(dmn);
+ dr_domain_uninit_csum_recalc_fts(dmn);
dr_domain_uninit_resources(dmn);
dr_domain_caps_uninit(dmn);
- mutex_destroy(&dmn->mutex);
+ mutex_destroy(&dmn->info.tx.mutex);
+ mutex_destroy(&dmn->info.rx.mutex);
kfree(dmn);
return 0;
}
@@ -376,7 +449,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn)
{
- mutex_lock(&dmn->mutex);
+ mlx5dr_domain_lock(dmn);
if (dmn->peer_dmn)
refcount_dec(&dmn->peer_dmn->refcount);
@@ -386,5 +459,5 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
if (dmn->peer_dmn)
refcount_inc(&dmn->peer_dmn->refcount);
- mutex_unlock(&dmn->mutex);
+ mlx5dr_domain_unlock(dmn);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 1fbcd012bb85..f05ef0cd54ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -5,7 +5,7 @@
#include "dr_types.h"
struct mlx5dr_fw_recalc_cs_ft *
-mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num)
{
struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
@@ -103,7 +103,9 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
int num_dest,
bool reformat_req,
u32 *tbl_id,
- u32 *group_id)
+ u32 *group_id,
+ bool ignore_flow_level,
+ u32 flow_source)
{
struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
struct mlx5dr_cmd_fte_info fte_info = {};
@@ -112,7 +114,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
int ret;
ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
- ft_attr.level = dmn->info.caps.max_ft_level - 2;
+ ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2,
+ MLX5_FT_MAX_MULTIPATH_LEVEL);
ft_attr.reformat_en = reformat_req;
ft_attr.decap_en = reformat_req;
@@ -136,6 +139,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
fte_info.dests_size = num_dest;
fte_info.val = val;
fte_info.dest_arr = dest;
+ fte_info.ignore_flow_level = ignore_flow_level;
+ fte_info.flow_context.flow_source = flow_source;
ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index d7c7467e2d53..4ca67fa24cc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -4,50 +4,15 @@
#include "dr_types.h"
#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
-#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024)
-
-struct mlx5dr_icm_pool;
-
-struct mlx5dr_icm_bucket {
- struct mlx5dr_icm_pool *pool;
-
- /* Chunks that aren't visible to HW not directly and not in cache */
- struct list_head free_list;
- unsigned int free_list_count;
-
- /* Used chunks, HW may be accessing this memory */
- struct list_head used_list;
- unsigned int used_list_count;
-
- /* HW may be accessing this memory but at some future,
- * undetermined time, it might cease to do so. Before deciding to call
- * sync_ste, this list is moved to sync_list
- */
- struct list_head hot_list;
- unsigned int hot_list_count;
-
- /* Pending sync list, entries from the hot list are moved to this list.
- * sync_ste is executed and then sync_list is concatenated to the free list
- */
- struct list_head sync_list;
- unsigned int sync_list_count;
-
- u32 total_chunks;
- u32 num_of_entries;
- u32 entry_size;
- /* protect the ICM bucket */
- struct mutex mutex;
-};
struct mlx5dr_icm_pool {
- struct mlx5dr_icm_bucket *buckets;
enum mlx5dr_icm_type icm_type;
enum mlx5dr_icm_chunk_size max_log_chunk_sz;
- enum mlx5dr_icm_chunk_size num_of_buckets;
- struct list_head icm_mr_list;
- /* protect the ICM MR list */
- struct mutex mr_mutex;
struct mlx5dr_domain *dmn;
+ /* memory management */
+ struct mutex mutex; /* protect the ICM pool and ICM buddy */
+ struct list_head buddy_mem_list;
+ u64 hot_memory_size;
};
struct mlx5dr_icm_dm {
@@ -58,18 +23,16 @@ struct mlx5dr_icm_dm {
};
struct mlx5dr_icm_mr {
- struct mlx5dr_icm_pool *pool;
- struct mlx5_core_mkey mkey;
+ u32 mkey;
struct mlx5dr_icm_dm dm;
- size_t used_length;
+ struct mlx5dr_domain *dmn;
size_t length;
u64 icm_start_addr;
- struct list_head mr_list;
};
static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
u32 pd, u64 length, u64 start_addr, int mode,
- struct mlx5_core_mkey *mkey)
+ u32 *mkey)
{
u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
@@ -94,31 +57,67 @@ static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
return mlx5_core_create_mkey(mdev, mkey, in, inlen);
}
+u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk)
+{
+ u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);
+
+ return (u64)offset * chunk->seg;
+}
+
+u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk)
+{
+ return chunk->buddy_mem->icm_mr->mkey;
+}
+
+u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk)
+{
+ u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);
+
+ return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg;
+}
+
+u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk)
+{
+ return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size,
+ chunk->buddy_mem->pool->icm_type);
+}
+
+u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk)
+{
+ return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size);
+}
+
static struct mlx5dr_icm_mr *
-dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
- enum mlx5_sw_icm_type type,
- size_t align_base)
+dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
{
struct mlx5_core_dev *mdev = pool->dmn->mdev;
+ enum mlx5_sw_icm_type dm_type;
struct mlx5dr_icm_mr *icm_mr;
- size_t align_diff;
+ size_t log_align_base;
int err;
icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
if (!icm_mr)
return NULL;
- icm_mr->pool = pool;
- INIT_LIST_HEAD(&icm_mr->mr_list);
-
- icm_mr->dm.type = type;
+ icm_mr->dmn = pool->dmn;
- /* 2^log_biggest_table * entry-size * double-for-alignment */
icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
- pool->icm_type) * 2;
+ pool->icm_type);
- err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
- &icm_mr->dm.addr, &icm_mr->dm.obj_id);
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ dm_type = MLX5_SW_ICM_TYPE_STEERING;
+ log_align_base = ilog2(icm_mr->dm.length);
+ } else {
+ dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ /* Align base is 64B */
+ log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+ }
+ icm_mr->dm.type = dm_type;
+
+ err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
+ log_align_base, 0, &icm_mr->dm.addr,
+ &icm_mr->dm.obj_id);
if (err) {
mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
goto free_icm_mr;
@@ -137,15 +136,16 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
icm_mr->icm_start_addr = icm_mr->dm.addr;
- /* align_base is always a power of 2 */
- align_diff = icm_mr->icm_start_addr & (align_base - 1);
- if (align_diff)
- icm_mr->used_length = align_base - align_diff;
-
- list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list);
+ if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) {
+ mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n",
+ log_align_base);
+ goto free_mkey;
+ }
return icm_mr;
+free_mkey:
+ mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
free_dm:
mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
icm_mr->dm.addr, icm_mr->dm.obj_id);
@@ -156,290 +156,279 @@ free_icm_mr:
static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
{
- struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev;
+ struct mlx5_core_dev *mdev = icm_mr->dmn->mdev;
struct mlx5dr_icm_dm *dm = &icm_mr->dm;
- list_del(&icm_mr->mr_list);
- mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+ mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
dm->addr, dm->obj_id);
kvfree(icm_mr);
}
-static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
+static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
{
- struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+ /* We support only one type of STE size, both for ConnectX-5 and later
+ * devices. Once the support for match STE which has a larger tag is
+ * added (32B instead of 16B), the STE size for devices later than
+ * ConnectX-5 needs to account for that.
+ */
+ return DR_STE_SIZE_REDUCED;
+}
- chunk->ste_arr = kvzalloc(bucket->num_of_entries *
- sizeof(chunk->ste_arr[0]), GFP_KERNEL);
- if (!chunk->ste_arr)
- return -ENOMEM;
+static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
+{
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ int index = offset / DR_STE_SIZE;
- chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries *
- DR_STE_SIZE_REDUCED, GFP_KERNEL);
- if (!chunk->hw_ste_arr)
- goto out_free_ste_arr;
+ chunk->ste_arr = &buddy->ste_arr[index];
+ chunk->miss_list = &buddy->miss_list[index];
+ chunk->hw_ste_arr = buddy->hw_ste_arr +
+ index * dr_icm_buddy_get_ste_size(buddy);
+}
- chunk->miss_list = kvmalloc(bucket->num_of_entries *
- sizeof(chunk->miss_list[0]), GFP_KERNEL);
- if (!chunk->miss_list)
- goto out_free_hw_ste_arr;
+static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
+{
+ int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
- return 0;
+ memset(chunk->hw_ste_arr, 0,
+ num_of_entries * dr_icm_buddy_get_ste_size(buddy));
+ memset(chunk->ste_arr, 0,
+ num_of_entries * sizeof(chunk->ste_arr[0]));
+}
-out_free_hw_ste_arr:
- kvfree(chunk->hw_ste_arr);
-out_free_ste_arr:
- kvfree(chunk->ste_arr);
- return -ENOMEM;
+static enum mlx5dr_icm_type
+get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk)
+{
+ return chunk->buddy_mem->pool->icm_type;
}
-static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket)
+static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
+ struct mlx5dr_icm_buddy_mem *buddy)
{
- size_t mr_free_size, mr_req_size, mr_row_size;
- struct mlx5dr_icm_pool *pool = bucket->pool;
- struct mlx5dr_icm_mr *icm_mr = NULL;
- struct mlx5dr_icm_chunk *chunk;
- enum mlx5_sw_icm_type dm_type;
- size_t align_base;
- int i, err = 0;
+ enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk);
- mr_req_size = bucket->num_of_entries * bucket->entry_size;
- mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
- pool->icm_type);
+ buddy->used_memory -= mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+ list_del(&chunk->chunk_list);
- if (pool->icm_type == DR_ICM_TYPE_STE) {
- dm_type = MLX5_SW_ICM_TYPE_STEERING;
- /* Align base is the biggest chunk size / row size */
- align_base = mr_row_size;
- } else {
- dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
- /* Align base is 64B */
- align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE;
- }
+ if (icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_cleanup(chunk);
- mutex_lock(&pool->mr_mutex);
- if (!list_empty(&pool->icm_mr_list)) {
- icm_mr = list_last_entry(&pool->icm_mr_list,
- struct mlx5dr_icm_mr, mr_list);
+ kvfree(chunk);
+}
- if (icm_mr)
- mr_free_size = icm_mr->dm.length - icm_mr->used_length;
- }
+static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
- if (!icm_mr || mr_free_size < mr_row_size) {
- icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base);
- if (!icm_mr) {
- err = -ENOMEM;
- goto out_err;
- }
- }
+ buddy->ste_arr = kvcalloc(num_of_entries,
+ sizeof(struct mlx5dr_ste), GFP_KERNEL);
+ if (!buddy->ste_arr)
+ return -ENOMEM;
- /* Create memory aligned chunks */
- for (i = 0; i < mr_row_size / mr_req_size; i++) {
- chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL);
- if (!chunk) {
- err = -ENOMEM;
- goto out_err;
- }
+ /* Preallocate full STE size on non-ConnectX-5 devices since
+ * we need to support both full and reduced with the same cache.
+ */
+ buddy->hw_ste_arr = kvcalloc(num_of_entries,
+ dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
+ if (!buddy->hw_ste_arr)
+ goto free_ste_arr;
- chunk->bucket = bucket;
- chunk->rkey = icm_mr->mkey.key;
- /* mr start addr is zero based */
- chunk->mr_addr = icm_mr->used_length;
- chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length;
- icm_mr->used_length += mr_req_size;
- chunk->num_of_entries = bucket->num_of_entries;
- chunk->byte_size = chunk->num_of_entries * bucket->entry_size;
-
- if (pool->icm_type == DR_ICM_TYPE_STE) {
- err = dr_icm_chunk_ste_init(chunk);
- if (err)
- goto out_free_chunk;
- }
+ buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
+ if (!buddy->miss_list)
+ goto free_hw_ste_arr;
- INIT_LIST_HEAD(&chunk->chunk_list);
- list_add(&chunk->chunk_list, &bucket->free_list);
- bucket->free_list_count++;
- bucket->total_chunks++;
- }
- mutex_unlock(&pool->mr_mutex);
return 0;
-out_free_chunk:
- kvfree(chunk);
-out_err:
- mutex_unlock(&pool->mr_mutex);
- return err;
+free_hw_ste_arr:
+ kvfree(buddy->hw_ste_arr);
+free_ste_arr:
+ kvfree(buddy->ste_arr);
+ return -ENOMEM;
}
-static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
+static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
{
- kvfree(chunk->miss_list);
- kvfree(chunk->hw_ste_arr);
- kvfree(chunk->ste_arr);
+ kvfree(buddy->ste_arr);
+ kvfree(buddy->hw_ste_arr);
+ kvfree(buddy->miss_list);
}
-static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk)
+static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
{
- struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+ struct mlx5dr_icm_buddy_mem *buddy;
+ struct mlx5dr_icm_mr *icm_mr;
- list_del(&chunk->chunk_list);
- bucket->total_chunks--;
+ icm_mr = dr_icm_pool_mr_create(pool);
+ if (!icm_mr)
+ return -ENOMEM;
- if (bucket->pool->icm_type == DR_ICM_TYPE_STE)
- dr_icm_chunk_ste_cleanup(chunk);
+ buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL);
+ if (!buddy)
+ goto free_mr;
- kvfree(chunk);
-}
+ if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz))
+ goto err_free_buddy;
-static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool,
- struct mlx5dr_icm_bucket *bucket,
- enum mlx5dr_icm_chunk_size chunk_size)
-{
- if (pool->icm_type == DR_ICM_TYPE_STE)
- bucket->entry_size = DR_STE_SIZE;
- else
- bucket->entry_size = DR_MODIFY_ACTION_SIZE;
-
- bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
- bucket->pool = pool;
- mutex_init(&bucket->mutex);
- INIT_LIST_HEAD(&bucket->free_list);
- INIT_LIST_HEAD(&bucket->used_list);
- INIT_LIST_HEAD(&bucket->hot_list);
- INIT_LIST_HEAD(&bucket->sync_list);
+ buddy->icm_mr = icm_mr;
+ buddy->pool = pool;
+
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ /* Reduce allocations by preallocating and reusing the STE structures */
+ if (dr_icm_buddy_init_ste_cache(buddy))
+ goto err_cleanup_buddy;
+ }
+
+ /* add it to the -start- of the list in order to search in it first */
+ list_add(&buddy->list_node, &pool->buddy_mem_list);
+
+ return 0;
+
+err_cleanup_buddy:
+ mlx5dr_buddy_cleanup(buddy);
+err_free_buddy:
+ kvfree(buddy);
+free_mr:
+ dr_icm_pool_mr_destroy(icm_mr);
+ return -ENOMEM;
}
-static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket)
+static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
{
struct mlx5dr_icm_chunk *chunk, *next;
- mutex_destroy(&bucket->mutex);
- list_splice_tail_init(&bucket->sync_list, &bucket->free_list);
- list_splice_tail_init(&bucket->hot_list, &bucket->free_list);
+ list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list)
+ dr_icm_chunk_destroy(chunk, buddy);
- list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list)
- dr_icm_chunk_destroy(chunk);
+ list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list)
+ dr_icm_chunk_destroy(chunk, buddy);
- WARN_ON(bucket->total_chunks != 0);
+ dr_icm_pool_mr_destroy(buddy->icm_mr);
- /* Cleanup of unreturned chunks */
- list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list)
- dr_icm_chunk_destroy(chunk);
+ mlx5dr_buddy_cleanup(buddy);
+
+ if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_cleanup_ste_cache(buddy);
+
+ kvfree(buddy);
}
-static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool)
+static struct mlx5dr_icm_chunk *
+dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ struct mlx5dr_icm_buddy_mem *buddy_mem_pool,
+ unsigned int seg)
{
- u64 hot_size = 0;
- int chunk_order;
+ struct mlx5dr_icm_chunk *chunk;
+ int offset;
+
+ chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL);
+ if (!chunk)
+ return NULL;
- for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++)
- hot_size += pool->buckets[chunk_order].hot_list_count *
- mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type);
+ offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
- return hot_size;
-}
+ chunk->seg = seg;
+ chunk->size = chunk_size;
+ chunk->buddy_mem = buddy_mem_pool;
-static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool,
- struct mlx5dr_icm_bucket *bucket)
-{
- u64 bytes_for_sync;
+ if (pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_init(chunk, offset);
- bytes_for_sync = dr_icm_hot_mem_size(pool);
- if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count)
- return false;
+ buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+ INIT_LIST_HEAD(&chunk->chunk_list);
- return true;
-}
+ /* chunk now is part of the used_list */
+ list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
-static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket)
-{
- list_splice_tail_init(&bucket->hot_list, &bucket->sync_list);
- bucket->sync_list_count += bucket->hot_list_count;
- bucket->hot_list_count = 0;
+ return chunk;
}
-static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket)
+static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
{
- list_splice_tail_init(&bucket->sync_list, &bucket->free_list);
- bucket->free_list_count += bucket->sync_list_count;
- bucket->sync_list_count = 0;
-}
+ int allow_hot_size;
-static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket)
-{
- list_splice_tail_init(&bucket->sync_list, &bucket->hot_list);
- bucket->hot_list_count += bucket->sync_list_count;
- bucket->sync_list_count = 0;
+ /* sync when hot memory reaches half of the pool size */
+ allow_hot_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) / 2;
+
+ return pool->hot_memory_size > allow_hot_size;
}
-static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool,
- struct mlx5dr_icm_bucket *cb,
- bool buckets[DR_CHUNK_SIZE_MAX])
+static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
{
- struct mlx5dr_icm_bucket *bucket;
- int i;
-
- for (i = 0; i < pool->num_of_buckets; i++) {
- bucket = &pool->buckets[i];
- if (bucket == cb) {
- dr_icm_chill_bucket_start(bucket);
- continue;
- }
+ struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
+ u32 num_entries;
+ int err;
- /* Freeing the mutex is done at the end of that process, after
- * sync_ste was executed at dr_icm_chill_buckets_end func.
- */
- if (mutex_trylock(&bucket->mutex)) {
- dr_icm_chill_bucket_start(bucket);
- buckets[i] = true;
- }
+ err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
+ if (err) {
+ mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err);
+ return err;
}
-}
-static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool,
- struct mlx5dr_icm_bucket *cb,
- bool buckets[DR_CHUNK_SIZE_MAX])
-{
- struct mlx5dr_icm_bucket *bucket;
- int i;
-
- for (i = 0; i < pool->num_of_buckets; i++) {
- bucket = &pool->buckets[i];
- if (bucket == cb) {
- dr_icm_chill_bucket_end(bucket);
- continue;
- }
+ list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) {
+ struct mlx5dr_icm_chunk *chunk, *tmp_chunk;
- if (!buckets[i])
- continue;
+ list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) {
+ num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+ mlx5dr_buddy_free_mem(buddy, chunk->seg, ilog2(num_entries));
+ pool->hot_memory_size -= mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+ dr_icm_chunk_destroy(chunk, buddy);
+ }
- dr_icm_chill_bucket_end(bucket);
- mutex_unlock(&bucket->mutex);
+ if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_destroy(buddy);
}
+
+ return 0;
}
-static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool,
- struct mlx5dr_icm_bucket *cb,
- bool buckets[DR_CHUNK_SIZE_MAX])
+static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ struct mlx5dr_icm_buddy_mem **buddy,
+ unsigned int *seg)
{
- struct mlx5dr_icm_bucket *bucket;
- int i;
-
- for (i = 0; i < pool->num_of_buckets; i++) {
- bucket = &pool->buckets[i];
- if (bucket == cb) {
- dr_icm_chill_bucket_abort(bucket);
- continue;
- }
+ struct mlx5dr_icm_buddy_mem *buddy_mem_pool;
+ bool new_mem = false;
+ int err;
- if (!buckets[i])
- continue;
+alloc_buddy_mem:
+ /* find the next free place from the buddy list */
+ list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) {
+ err = mlx5dr_buddy_alloc_mem(buddy_mem_pool,
+ chunk_size, seg);
+ if (!err)
+ goto found;
+
+ if (WARN_ON(new_mem)) {
+ /* We have new memory pool, first in the list */
+ mlx5dr_err(pool->dmn,
+ "No memory for order: %d\n",
+ chunk_size);
+ goto out;
+ }
+ }
- dr_icm_chill_bucket_abort(bucket);
- mutex_unlock(&bucket->mutex);
+ /* no more available allocators in that pool, create new */
+ err = dr_icm_buddy_create(pool);
+ if (err) {
+ mlx5dr_err(pool->dmn,
+ "Failed creating buddy for order %d\n",
+ chunk_size);
+ goto out;
}
+
+ /* mark we have new memory, first in list */
+ new_mem = true;
+ goto alloc_buddy_mem;
+
+found:
+ *buddy = buddy_mem_pool;
+out:
+ return err;
}
/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and
@@ -449,68 +438,48 @@ struct mlx5dr_icm_chunk *
mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
enum mlx5dr_icm_chunk_size chunk_size)
{
- struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */
- bool buckets[DR_CHUNK_SIZE_MAX] = {};
- struct mlx5dr_icm_bucket *bucket;
- int err;
+ struct mlx5dr_icm_chunk *chunk = NULL;
+ struct mlx5dr_icm_buddy_mem *buddy;
+ unsigned int seg;
+ int ret;
if (chunk_size > pool->max_log_chunk_sz)
return NULL;
- bucket = &pool->buckets[chunk_size];
-
- mutex_lock(&bucket->mutex);
-
- /* Take chunk from pool if available, otherwise allocate new chunks */
- if (list_empty(&bucket->free_list)) {
- if (dr_icm_reuse_hot_entries(pool, bucket)) {
- dr_icm_chill_buckets_start(pool, bucket, buckets);
- err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
- if (err) {
- dr_icm_chill_buckets_abort(pool, bucket, buckets);
- mlx5dr_dbg(pool->dmn, "Sync_steering failed\n");
- chunk = NULL;
- goto out;
- }
- dr_icm_chill_buckets_end(pool, bucket, buckets);
- } else {
- dr_icm_chunks_create(bucket);
- }
- }
+ mutex_lock(&pool->mutex);
+ /* find mem, get back the relevant buddy pool and seg in that mem */
+ ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg);
+ if (ret)
+ goto out;
- if (!list_empty(&bucket->free_list)) {
- chunk = list_last_entry(&bucket->free_list,
- struct mlx5dr_icm_chunk,
- chunk_list);
- if (chunk) {
- list_del_init(&chunk->chunk_list);
- list_add_tail(&chunk->chunk_list, &bucket->used_list);
- bucket->free_list_count--;
- bucket->used_list_count++;
- }
- }
+ chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg);
+ if (!chunk)
+ goto out_err;
+
+ goto out;
+
+out_err:
+ mlx5dr_buddy_free_mem(buddy, seg, chunk_size);
out:
- mutex_unlock(&bucket->mutex);
+ mutex_unlock(&pool->mutex);
return chunk;
}
void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
{
- struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ struct mlx5dr_icm_pool *pool = buddy->pool;
- if (bucket->pool->icm_type == DR_ICM_TYPE_STE) {
- memset(chunk->ste_arr, 0,
- bucket->num_of_entries * sizeof(chunk->ste_arr[0]));
- memset(chunk->hw_ste_arr, 0,
- bucket->num_of_entries * DR_STE_SIZE_REDUCED);
- }
+ /* move the memory to the waiting list AKA "hot" */
+ mutex_lock(&pool->mutex);
+ list_move_tail(&chunk->chunk_list, &buddy->hot_list);
+ pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+
+ /* Check if we have chunks that are waiting for sync-ste */
+ if (dr_icm_pool_is_sync_required(pool))
+ dr_icm_pool_sync_all_buddy_pools(pool);
- mutex_lock(&bucket->mutex);
- list_del_init(&chunk->chunk_list);
- list_add_tail(&chunk->chunk_list, &bucket->hot_list);
- bucket->hot_list_count++;
- bucket->used_list_count--;
- mutex_unlock(&bucket->mutex);
+ mutex_unlock(&pool->mutex);
}
struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
@@ -518,7 +487,6 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
{
enum mlx5dr_icm_chunk_size max_log_chunk_sz;
struct mlx5dr_icm_pool *pool;
- int i;
if (icm_type == DR_ICM_TYPE_STE)
max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
@@ -529,43 +497,24 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
if (!pool)
return NULL;
- pool->buckets = kcalloc(max_log_chunk_sz + 1,
- sizeof(pool->buckets[0]),
- GFP_KERNEL);
- if (!pool->buckets)
- goto free_pool;
-
pool->dmn = dmn;
pool->icm_type = icm_type;
pool->max_log_chunk_sz = max_log_chunk_sz;
- pool->num_of_buckets = max_log_chunk_sz + 1;
- INIT_LIST_HEAD(&pool->icm_mr_list);
- for (i = 0; i < pool->num_of_buckets; i++)
- dr_icm_bucket_init(pool, &pool->buckets[i], i);
+ INIT_LIST_HEAD(&pool->buddy_mem_list);
- mutex_init(&pool->mr_mutex);
+ mutex_init(&pool->mutex);
return pool;
-
-free_pool:
- kvfree(pool);
- return NULL;
}
void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool)
{
- struct mlx5dr_icm_mr *icm_mr, *next;
- int i;
-
- mutex_destroy(&pool->mr_mutex);
-
- list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list)
- dr_icm_pool_mr_destroy(icm_mr);
+ struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
- for (i = 0; i < pool->num_of_buckets; i++)
- dr_icm_bucket_cleanup(&pool->buckets[i]);
+ list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node)
+ dr_icm_buddy_destroy(buddy);
- kfree(pool->buckets);
+ mutex_destroy(&pool->mutex);
kvfree(pool);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index c6dbd856df94..0726848eb3ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
return (spec->dmac_47_16 || spec->dmac_15_0);
}
-static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec)
-{
- return (spec->src_ip_127_96 || spec->src_ip_95_64 ||
- spec->src_ip_63_32 || spec->src_ip_31_0);
-}
-
-static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec)
-{
- return (spec->dst_ip_127_96 || spec->dst_ip_95_64 ||
- spec->dst_ip_63_32 || spec->dst_ip_31_0);
-}
-
static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
{
return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
@@ -59,6 +47,11 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec)
return spec->ttl_hoplimit;
}
+static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec)
+{
+ return spec->ipv4_ihl;
+}
+
#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \
(_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \
(_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \
@@ -85,25 +78,27 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec)
(_misc2)._inner_outer##_first_mpls_s_bos || \
(_misc2)._inner_outer##_first_mpls_ttl)
-static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc)
+static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc)
{
return (misc->gre_key_h || misc->gre_key_l ||
misc->gre_protocol || misc->gre_c_present ||
misc->gre_k_present || misc->gre_s_present);
}
-#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \
- (_misc2).outer_first_mpls_over_##gre_udp##_label || \
- (_misc2).outer_first_mpls_over_##gre_udp##_exp || \
- (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \
- (_misc2).outer_first_mpls_over_##gre_udp##_ttl)
+#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_gre_label || \
+ (_misc)->outer_first_mpls_over_gre_exp || \
+ (_misc)->outer_first_mpls_over_gre_s_bos || \
+ (_misc)->outer_first_mpls_over_gre_ttl)
-#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \
- DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \
- DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp))
+#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_udp_label || \
+ (_misc)->outer_first_mpls_over_udp_exp || \
+ (_misc)->outer_first_mpls_over_udp_s_bos || \
+ (_misc)->outer_first_mpls_over_udp_ttl)
static bool
-dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3)
+dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3)
{
return (misc3->outer_vxlan_gpe_vni ||
misc3->outer_vxlan_gpe_next_protocol ||
@@ -111,21 +106,21 @@ dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3)
}
static bool
-dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_cmd_caps *caps)
+dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps)
{
- return caps->flex_protocols &
- MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED;
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED);
}
static bool
-dr_mask_is_flex_parser_tnl_vxlan_gpe_set(struct mlx5dr_match_param *mask,
- struct mlx5dr_domain *dmn)
+dr_mask_is_tnl_vxlan_gpe(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
{
- return dr_mask_is_misc3_vxlan_gpe_set(&mask->misc3) &&
- dr_matcher_supp_flex_parser_vxlan_gpe(&dmn->info.caps);
+ return dr_mask_is_vxlan_gpe_set(&mask->misc3) &&
+ dr_matcher_supp_vxlan_gpe(&dmn->info.caps);
}
-static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc)
+static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc)
{
return misc->geneve_vni ||
misc->geneve_oam ||
@@ -133,27 +128,171 @@ static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc)
misc->geneve_opt_len;
}
+static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3)
+{
+ return misc3->geneve_tlv_option_0_data;
+}
+
+static bool
+dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_parser_ok_bits_supp;
+}
+
+static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) &&
+ misc->geneve_tlv_option_0_exist;
+}
+
static bool
-dr_matcher_supp_flex_parser_geneve(struct mlx5dr_cmd_caps *caps)
+dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps)
{
- return caps->flex_protocols &
- MLX5_FLEX_PARSER_GENEVE_ENABLED;
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED);
}
static bool
-dr_mask_is_flex_parser_tnl_geneve_set(struct mlx5dr_match_param *mask,
- struct mlx5dr_domain *dmn)
+dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_tnl_geneve_set(&mask->misc) &&
+ dr_matcher_supp_tnl_geneve(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3)
+{
+ return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid;
+}
+
+static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_tnl_gtpu_set(&mask->misc3) &&
+ dr_matcher_supp_tnl_gtpu(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps)
{
- return dr_mask_is_misc_geneve_set(&mask->misc) &&
- dr_matcher_supp_flex_parser_geneve(&dmn->info.caps);
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED;
}
-static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3)
+static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_dw_0 &&
+ dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_teid &&
+ dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_dw_2 &&
+ dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_first_ext_dw_0 &&
+ dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+ return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) &&
+ dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) ||
+ (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) &&
+ dr_mask_is_tnl_gtpu_teid(mask, dmn)) ||
+ (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) &&
+ dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) ||
+ (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) &&
+ dr_mask_is_tnl_gtpu_first_ext(mask, dmn));
+}
+
+static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+ return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) &&
+ dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) ||
+ (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) &&
+ dr_mask_is_tnl_gtpu_teid(mask, dmn)) ||
+ (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) &&
+ dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) ||
+ (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) &&
+ dr_mask_is_tnl_gtpu_first_ext(mask, dmn));
+}
+
+static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) ||
+ dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) ||
+ dr_mask_is_tnl_gtpu(mask, dmn);
+}
+
+static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps)
+{
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED);
+}
+
+static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps)
+{
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED);
+}
+
+static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3)
{
return (misc3->icmpv6_type || misc3->icmpv6_code ||
misc3->icmpv6_header_data);
}
+static bool dr_mask_is_icmp(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ if (DR_MASK_IS_ICMPV4_SET(&mask->misc3))
+ return dr_matcher_supp_icmp_v4(&dmn->info.caps);
+ else if (dr_mask_is_icmpv6_set(&mask->misc3))
+ return dr_matcher_supp_icmp_v6(&dmn->info.caps);
+
+ return false;
+}
+
static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2)
{
return misc2->metadata_reg_a;
@@ -176,6 +315,71 @@ static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc)
return (misc->source_sqn || misc->source_port);
}
+static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id,
+ u32 flex_parser_value)
+{
+ if (flex_parser_id)
+ return flex_parser_id <= DR_STE_MAX_FLEX_0_ID;
+
+ /* Using flex_parser 0 means that id is zero, thus value must be set. */
+ return flex_parser_value;
+}
+
+static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4)
+{
+ return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0,
+ misc4->prog_sample_field_value_0) ||
+ dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1,
+ misc4->prog_sample_field_value_1) ||
+ dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2,
+ misc4->prog_sample_field_value_2) ||
+ dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3,
+ misc4->prog_sample_field_value_3));
+}
+
+static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id)
+{
+ return flex_parser_id > DR_STE_MAX_FLEX_0_ID &&
+ flex_parser_id <= DR_STE_MAX_FLEX_1_ID;
+}
+
+static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4)
+{
+ return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) ||
+ dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) ||
+ dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) ||
+ dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3));
+}
+
+static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED;
+}
+
+static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) &&
+ dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED;
+}
+
+static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) &&
+ dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5)
+{
+ return misc5->tunnel_header_0 || misc5->tunnel_header_1;
+}
+
int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
enum mlx5dr_ipv outer_ipv,
@@ -202,15 +406,16 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_match_param mask = {};
- struct mlx5dr_match_misc3 *misc3;
+ bool allow_empty_match = false;
struct mlx5dr_ste_build *sb;
bool inner, rx;
int idx = 0;
int ret, i;
sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
- rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+ rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
/* Create a temporary mask to track and clear used mask fields */
if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER)
@@ -228,104 +433,149 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3)
mask.misc3 = matcher->mask.misc3;
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4)
+ mask.misc4 = matcher->mask.misc4;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5)
+ mask.misc5 = matcher->mask.misc5;
+
ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
&matcher->mask, NULL);
if (ret)
return ret;
+ /* Optimize RX pipe by reducing source port match, since
+ * the FDB RX part is connected only to the wire.
+ */
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
+ rx && mask.misc.source_port) {
+ mask.misc.source_port = 0;
+ mask.misc.source_eswitch_owner_vhca_id = 0;
+ allow_empty_match = true;
+ }
+
/* Outer */
if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
DR_MATCHER_CRITERIA_MISC |
DR_MATCHER_CRITERIA_MISC2 |
- DR_MATCHER_CRITERIA_MISC3)) {
+ DR_MATCHER_CRITERIA_MISC3 |
+ DR_MATCHER_CRITERIA_MISC5)) {
inner = false;
if (dr_mask_is_wqe_metadata_set(&mask.misc2))
- mlx5dr_ste_build_general_purpose(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_general_purpose(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (dr_mask_is_reg_c_0_3_set(&mask.misc2))
- mlx5dr_ste_build_register_0(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_register_0(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (dr_mask_is_reg_c_4_7_set(&mask.misc2))
- mlx5dr_ste_build_register_1(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_register_1(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) &&
(dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
- ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
- dmn, inner, rx);
- if (ret)
- return ret;
+ mlx5dr_ste_build_src_gvmi_qpn(ste_ctx, &sb[idx++],
+ &mask, dmn, inner, rx);
}
if (dr_mask_is_smac_set(&mask.outer) &&
dr_mask_is_dmac_set(&mask.outer)) {
- ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
- inner, rx);
- if (ret)
- return ret;
+ mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
}
if (dr_mask_is_smac_set(&mask.outer))
- mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer))
- mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (outer_ipv == DR_RULE_IPV6) {
- if (dr_mask_is_dst_addr_set(&mask.outer))
- mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
- inner, rx);
+ if (DR_MASK_IS_DST_IP_SET(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
- if (dr_mask_is_src_addr_set(&mask.outer))
- mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask,
- inner, rx);
+ if (DR_MASK_IS_SRC_IP_SET(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer))
- mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask,
- inner, rx);
+ mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
} else {
if (dr_mask_is_ipv4_5_tuple_set(&mask.outer))
- mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask,
- inner, rx);
+ mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
- if (dr_mask_is_ttl_set(&mask.outer))
- mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask,
- inner, rx);
+ if (dr_mask_is_ttl_set(&mask.outer) ||
+ dr_mask_is_ipv4_ihl_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
}
- if (dr_mask_is_flex_parser_tnl_vxlan_gpe_set(&mask, dmn))
- mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(&sb[idx++],
- &mask,
- inner, rx);
- else if (dr_mask_is_flex_parser_tnl_geneve_set(&mask, dmn))
- mlx5dr_ste_build_flex_parser_tnl_geneve(&sb[idx++],
- &mask,
- inner, rx);
+ if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn))
+ mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ else if (dr_mask_is_tnl_geneve(&mask, dmn)) {
+ mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3))
+ mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn))
+ mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) {
+ if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn))
+ mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn))
+ mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_tnl_gtpu(&mask, dmn))
+ mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) {
+ mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
- mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer))
- mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx);
-
- if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2))
- mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask,
- inner, rx);
-
- misc3 = &mask.misc3;
- if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) &&
- mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) ||
- (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) &&
- mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) {
- ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++],
- &mask, &dmn->info.caps,
- inner, rx);
- if (ret)
- return ret;
- }
- if (dr_mask_is_gre_set(&mask.misc))
- mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_icmp(&mask, dmn))
+ mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_tnl_gre_set(&mask.misc))
+ mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
}
/* Inner */
@@ -336,66 +586,88 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
inner = true;
if (dr_mask_is_eth_l2_tnl_set(&mask.misc))
- mlx5dr_ste_build_eth_l2_tnl(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l2_tnl(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (dr_mask_is_smac_set(&mask.inner) &&
dr_mask_is_dmac_set(&mask.inner)) {
- ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
- &mask, inner, rx);
- if (ret)
- return ret;
+ mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
}
if (dr_mask_is_smac_set(&mask.inner))
- mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner))
- mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (inner_ipv == DR_RULE_IPV6) {
- if (dr_mask_is_dst_addr_set(&mask.inner))
- mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
- inner, rx);
+ if (DR_MASK_IS_DST_IP_SET(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
- if (dr_mask_is_src_addr_set(&mask.inner))
- mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask,
- inner, rx);
+ if (DR_MASK_IS_SRC_IP_SET(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner))
- mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask,
- inner, rx);
+ mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
} else {
if (dr_mask_is_ipv4_5_tuple_set(&mask.inner))
- mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask,
- inner, rx);
+ mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
- if (dr_mask_is_ttl_set(&mask.inner))
- mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask,
- inner, rx);
+ if (dr_mask_is_ttl_set(&mask.inner) ||
+ dr_mask_is_ipv4_ihl_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
}
if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner))
- mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner))
- mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx);
+ mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ }
- if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2))
- mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx);
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+ if (dr_mask_is_flex_parser_0_3_set(&mask.misc4))
+ mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++],
+ &mask, false, rx);
+
+ if (dr_mask_is_flex_parser_4_7_set(&mask.misc4))
+ mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++],
+ &mask, false, rx);
}
+
/* Empty matcher, takes all */
- if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
+ if ((!idx && allow_empty_match) ||
+ matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
if (idx == 0) {
- mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n");
+ mlx5dr_err(dmn, "Cannot generate any valid rules from mask\n");
return -EINVAL;
}
/* Check that all mask fields were consumed */
for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) {
if (((u8 *)&mask)[i] != 0) {
- mlx5dr_info(dmn, "Mask contains unsupported parameters\n");
+ mlx5dr_dbg(dmn, "Mask contains unsupported parameters\n");
return -EOPNOTSUPP;
}
}
@@ -406,10 +678,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
return 0;
}
-static int dr_matcher_connect(struct mlx5dr_domain *dmn,
- struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
- struct mlx5dr_matcher_rx_tx *next_nic_matcher,
- struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
{
struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl;
struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
@@ -433,7 +705,7 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn,
/* Connect start hash table to end anchor */
info.type = CONNECT_MISS;
- info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr;
+ info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk);
ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
curr_nic_matcher->s_htbl,
&info, false);
@@ -454,70 +726,63 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn,
return ret;
/* Update the pointing ste and next hash table */
- curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr;
- prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
+ curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr;
+ prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
if (next_nic_matcher) {
- next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr;
- curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ next_nic_matcher->s_htbl->pointing_ste =
+ curr_nic_matcher->e_anchor->chunk->ste_arr;
+ curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl =
+ next_nic_matcher->s_htbl;
}
return 0;
}
-static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
{
- struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher;
- struct mlx5dr_table *tbl = matcher->tbl;
- struct mlx5dr_domain *dmn = tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher;
+ struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl;
bool first = true;
int ret;
- next_matcher = NULL;
- if (!list_empty(&tbl->matcher_list))
- list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) {
- if (tmp_matcher->prio >= matcher->prio) {
- next_matcher = tmp_matcher;
- break;
- }
- first = false;
+ /* If the nic matcher is already on its parent nic table list,
+ * then it is already connected to the chain of nic matchers.
+ */
+ if (!list_empty(&nic_matcher->list_node))
+ return 0;
+
+ next_nic_matcher = NULL;
+ list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) {
+ if (tmp_nic_matcher->prio >= nic_matcher->prio) {
+ next_nic_matcher = tmp_nic_matcher;
+ break;
}
+ first = false;
+ }
- prev_matcher = NULL;
- if (next_matcher && !first)
- prev_matcher = list_prev_entry(next_matcher, matcher_list);
+ prev_nic_matcher = NULL;
+ if (next_nic_matcher && !first)
+ prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node);
else if (!first)
- prev_matcher = list_last_entry(&tbl->matcher_list,
- struct mlx5dr_matcher,
- matcher_list);
-
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
- ret = dr_matcher_connect(dmn, &matcher->rx,
- next_matcher ? &next_matcher->rx : NULL,
- prev_matcher ? &prev_matcher->rx : NULL);
- if (ret)
- return ret;
- }
+ prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list,
+ struct mlx5dr_matcher_rx_tx,
+ list_node);
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
- ret = dr_matcher_connect(dmn, &matcher->tx,
- next_matcher ? &next_matcher->tx : NULL,
- prev_matcher ? &prev_matcher->tx : NULL);
- if (ret)
- return ret;
- }
+ ret = dr_nic_matcher_connect(dmn, nic_matcher,
+ next_nic_matcher, prev_nic_matcher);
+ if (ret)
+ return ret;
- if (prev_matcher)
- list_add(&matcher->matcher_list, &prev_matcher->matcher_list);
- else if (next_matcher)
- list_add_tail(&matcher->matcher_list,
- &next_matcher->matcher_list);
+ if (prev_nic_matcher)
+ list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node);
+ else if (next_nic_matcher)
+ list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node);
else
- list_add(&matcher->matcher_list, &tbl->matcher_list);
+ list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list);
- return 0;
+ return ret;
}
static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher)
@@ -563,7 +828,7 @@ static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher,
dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6);
if (!nic_matcher->ste_builder) {
- mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
+ mlx5dr_err(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
return -EINVAL;
}
@@ -576,6 +841,9 @@ static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
int ret;
+ nic_matcher->prio = matcher->prio;
+ INIT_LIST_HEAD(&nic_matcher->list_node);
+
ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher);
if (ret)
return ret;
@@ -626,27 +894,61 @@ uninit_nic_rx:
return ret;
}
-static int dr_matcher_init(struct mlx5dr_matcher *matcher,
- struct mlx5dr_match_parameters *mask)
+static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
{
- struct mlx5dr_table *tbl = matcher->tbl;
- struct mlx5dr_domain *dmn = tbl->dmn;
- int ret;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_match_parameters consumed_mask;
+ int i, ret = 0;
if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) {
- mlx5dr_info(dmn, "Invalid match criteria attribute\n");
+ mlx5dr_err(dmn, "Invalid match criteria attribute\n");
return -EINVAL;
}
if (mask) {
- if (mask->match_sz > sizeof(struct mlx5dr_match_param)) {
- mlx5dr_info(dmn, "Invalid match size attribute\n");
+ if (mask->match_sz > DR_SZ_MATCH_PARAM) {
+ mlx5dr_err(dmn, "Invalid match size attribute\n");
return -EINVAL;
}
+
+ consumed_mask.match_buf = kzalloc(mask->match_sz, GFP_KERNEL);
+ if (!consumed_mask.match_buf)
+ return -ENOMEM;
+
+ consumed_mask.match_sz = mask->match_sz;
+ memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz);
mlx5dr_ste_copy_param(matcher->match_criteria,
- &matcher->mask, mask);
+ &matcher->mask, &consumed_mask, true);
+
+ /* Check that all mask data was consumed */
+ for (i = 0; i < consumed_mask.match_sz; i++) {
+ if (!((u8 *)consumed_mask.match_buf)[i])
+ continue;
+
+ mlx5dr_dbg(dmn,
+ "Match param mask contains unsupported parameters\n");
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ kfree(consumed_mask.match_buf);
}
+ return ret;
+}
+
+static int dr_matcher_init(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_table *tbl = matcher->tbl;
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ int ret;
+
+ ret = dr_matcher_copy_param(matcher, mask);
+ if (ret)
+ return ret;
+
switch (dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
matcher->rx.nic_tbl = &tbl->rx;
@@ -663,15 +965,29 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher,
break;
default:
WARN_ON(true);
- return -EINVAL;
+ ret = -EINVAL;
}
return ret;
}
+static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher)
+{
+ mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+ list_add(&matcher->list_node, &matcher->tbl->matcher_list);
+ mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+}
+
+static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher)
+{
+ mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+ list_del(&matcher->list_node);
+ mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+}
+
struct mlx5dr_matcher *
mlx5dr_matcher_create(struct mlx5dr_table *tbl,
- u16 priority,
+ u32 priority,
u8 match_criteria_enable,
struct mlx5dr_match_parameters *mask)
{
@@ -688,36 +1004,33 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
matcher->prio = priority;
matcher->match_criteria = match_criteria_enable;
refcount_set(&matcher->refcount, 1);
- INIT_LIST_HEAD(&matcher->matcher_list);
+ INIT_LIST_HEAD(&matcher->list_node);
+ INIT_LIST_HEAD(&matcher->dbg_rule_list);
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
ret = dr_matcher_init(matcher, mask);
if (ret)
goto free_matcher;
- ret = dr_matcher_add_to_tbl(matcher);
- if (ret)
- goto matcher_uninit;
+ dr_matcher_add_to_dbg_list(matcher);
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
return matcher;
-matcher_uninit:
- dr_matcher_uninit(matcher);
free_matcher:
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
kfree(matcher);
dec_ref:
refcount_dec(&tbl->refcount);
return NULL;
}
-static int dr_matcher_disconnect(struct mlx5dr_domain *dmn,
- struct mlx5dr_table_rx_tx *nic_tbl,
- struct mlx5dr_matcher_rx_tx *next_nic_matcher,
- struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
struct mlx5dr_htbl_connect_info info;
@@ -732,55 +1045,46 @@ static int dr_matcher_disconnect(struct mlx5dr_domain *dmn,
if (next_nic_matcher) {
info.type = CONNECT_HIT;
info.hit_next_htbl = next_nic_matcher->s_htbl;
- next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr;
- prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr;
+ prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
} else {
info.type = CONNECT_MISS;
info.miss_icm_addr = nic_tbl->default_icm_addr;
- prev_anchor->ste_arr[0].next_htbl = NULL;
+ prev_anchor->chunk->ste_arr[0].next_htbl = NULL;
}
return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor,
&info, true);
}
-static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher)
+int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
{
- struct mlx5dr_matcher *prev_matcher, *next_matcher;
- struct mlx5dr_table *tbl = matcher->tbl;
- struct mlx5dr_domain *dmn = tbl->dmn;
- int ret = 0;
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher;
+ struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl;
+ int ret;
- if (list_is_last(&matcher->matcher_list, &tbl->matcher_list))
- next_matcher = NULL;
- else
- next_matcher = list_next_entry(matcher, matcher_list);
+ /* If the nic matcher is not on its parent nic table list,
+ * then it is detached - no need to disconnect it.
+ */
+ if (list_empty(&nic_matcher->list_node))
+ return 0;
- if (matcher->matcher_list.prev == &tbl->matcher_list)
- prev_matcher = NULL;
+ if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list))
+ next_nic_matcher = NULL;
else
- prev_matcher = list_prev_entry(matcher, matcher_list);
-
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
- ret = dr_matcher_disconnect(dmn, &tbl->rx,
- next_matcher ? &next_matcher->rx : NULL,
- prev_matcher ? &prev_matcher->rx : NULL);
- if (ret)
- return ret;
- }
+ next_nic_matcher = list_next_entry(nic_matcher, list_node);
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
- dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
- ret = dr_matcher_disconnect(dmn, &tbl->tx,
- next_matcher ? &next_matcher->tx : NULL,
- prev_matcher ? &prev_matcher->tx : NULL);
- if (ret)
- return ret;
- }
+ if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list)
+ prev_nic_matcher = NULL;
+ else
+ prev_nic_matcher = list_prev_entry(nic_matcher, list_node);
- list_del(&matcher->matcher_list);
+ ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher);
+ if (ret)
+ return ret;
+ list_del_init(&nic_matcher->list_node);
return 0;
}
@@ -788,16 +1092,16 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
{
struct mlx5dr_table *tbl = matcher->tbl;
- if (refcount_read(&matcher->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1))
return -EBUSY;
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
- dr_matcher_remove_from_tbl(matcher);
+ dr_matcher_remove_from_dbg_list(matcher);
dr_matcher_uninit(matcher);
refcount_dec(&matcher->tbl->refcount);
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
kfree(matcher);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index e4cff7abb348..91ff19f67695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -5,12 +5,8 @@
#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES)
-struct mlx5dr_rule_action_member {
- struct mlx5dr_action *action;
- struct list_head list;
-};
-
-static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste,
+static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste *new_last_ste,
struct list_head *miss_list,
struct list_head *send_list)
{
@@ -25,12 +21,12 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste,
if (!ste_info_last)
return -ENOMEM;
- mlx5dr_ste_set_miss_addr(last_ste->hw_ste,
+ mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste),
mlx5dr_ste_get_icm_addr(new_last_ste));
list_add_tail(&new_last_ste->miss_list_node, miss_list);
- mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED,
- 0, last_ste->hw_ste,
+ mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL,
+ 0, mlx5dr_ste_get_hw_ste(last_ste),
ste_info_last, send_list, true);
return 0;
@@ -42,8 +38,10 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
u8 *hw_ste)
{
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_htbl *new_htbl;
struct mlx5dr_ste *ste;
+ u64 icm_addr;
/* Create new table for miss entry */
new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
@@ -56,8 +54,9 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
}
/* One and only entry, never grows */
- ste = new_htbl->ste_arr;
- mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+ ste = new_htbl->chunk->ste_arr;
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr);
mlx5dr_htbl_get(new_htbl);
return ste;
@@ -78,9 +77,10 @@ dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
}
ste->ste_chain_location = orig_ste->ste_chain_location;
+ ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste;
/* In collision entry, all members share the same miss_list_head */
- ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
+ ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
/* Next table */
if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste,
@@ -103,14 +103,21 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info,
int ret;
list_del(&ste_info->send_list);
+
+ /* Copy data to ste, only reduced size or control, the last 16B (mask)
+ * is already written to the hw.
+ */
+ if (ste_info->size == DR_STE_SIZE_CTRL)
+ memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste),
+ ste_info->data, DR_STE_SIZE_CTRL);
+ else
+ memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste),
+ ste_info->data, DR_STE_SIZE_REDUCED);
+
ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data,
ste_info->size, ste_info->offset);
if (ret)
goto out;
- /* Copy data to ste, only reduced size, the last 16B (mask)
- * is already written to the hw.
- */
- memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED);
out:
kfree(ste_info);
@@ -155,7 +162,7 @@ dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste)
/* Check if hw_ste is present in the list */
list_for_each_entry(ste, miss_list, miss_list_node) {
- if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste))
+ if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste))
return ste;
}
@@ -169,6 +176,7 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
struct mlx5dr_ste *col_ste,
u8 *hw_ste)
{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste *new_ste;
int ret;
@@ -176,15 +184,18 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
if (!new_ste)
return NULL;
+ /* Update collision pointing STE */
+ new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste;
+
/* In collision entry, all members share the same miss_list_head */
- new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste);
+ new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste);
/* Update the previous from the list */
- ret = dr_rule_append_to_miss_list(new_ste,
+ ret = dr_rule_append_to_miss_list(dmn->ste_ctx, new_ste,
mlx5dr_ste_get_miss_list(col_ste),
update_list);
if (ret) {
- mlx5dr_dbg(matcher->tbl->dmn, "Failed update dup entry\n");
+ mlx5dr_dbg(dmn, "Failed update dup entry\n");
goto err_exit;
}
@@ -203,7 +214,7 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
new_ste->next_htbl = cur_ste->next_htbl;
new_ste->ste_chain_location = cur_ste->ste_chain_location;
- if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location))
+ if (new_ste->next_htbl)
new_ste->next_htbl->pointing_ste = new_ste;
/* We need to copy the refcount since this ste
@@ -211,10 +222,8 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
*/
new_ste->refcount = cur_ste->refcount;
- /* Link old STEs rule_mem list to the new ste */
- mlx5dr_rule_update_rule_member(cur_ste, new_ste);
- INIT_LIST_HEAD(&new_ste->rule_list);
- list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list);
+ /* Link old STEs rule to the new ste */
+ mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false);
}
static struct mlx5dr_ste *
@@ -224,10 +233,12 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
struct mlx5dr_ste_htbl *new_htbl,
struct list_head *update_list)
{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste_send_info *ste_info;
bool use_update_list = false;
u8 hw_ste[DR_STE_SIZE] = {};
struct mlx5dr_ste *new_ste;
+ u64 icm_addr;
int new_idx;
u8 sb_idx;
@@ -236,13 +247,14 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask);
/* Copy STE control and tag */
- memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED);
- mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED);
+ mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
- new_ste = &new_htbl->ste_arr[new_idx];
+ new_ste = &new_htbl->chunk->ste_arr[new_idx];
- if (mlx5dr_ste_not_used_ste(new_ste)) {
+ if (mlx5dr_ste_is_not_used(new_ste)) {
mlx5dr_htbl_get(new_htbl);
list_add_tail(&new_ste->miss_list_node,
mlx5dr_ste_get_miss_list(new_ste));
@@ -253,7 +265,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
new_ste,
hw_ste);
if (!new_ste) {
- mlx5dr_dbg(matcher->tbl->dmn, "Failed adding collision entry, index: %d\n",
+ mlx5dr_dbg(dmn, "Failed adding collision entry, index: %d\n",
new_idx);
return NULL;
}
@@ -261,7 +273,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
use_update_list = true;
}
- memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED);
new_htbl->ctrl.num_of_valid_entries++;
@@ -326,7 +338,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
int err = 0;
int i;
- cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size);
+ cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size);
if (cur_entries < 1) {
mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n");
@@ -334,8 +346,8 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
}
for (i = 0; i < cur_entries; i++) {
- cur_ste = &cur_htbl->ste_arr[i];
- if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */
+ cur_ste = &cur_htbl->chunk->ste_arr[i];
+ if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */
continue;
err = dr_rule_rehash_copy_miss_list(matcher,
@@ -390,9 +402,10 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
/* Write new table to HW */
info.type = CONNECT_MISS;
- info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
- mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi,
- nic_dmn,
+ info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
+ dmn->info.caps.gvmi,
+ nic_dmn->type,
new_htbl,
formatted_ste,
&info);
@@ -436,20 +449,22 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
/* It is safe to operate dr_ste_set_hit_addr on the hw_ste here
* (48B len) which works only on first 32B
*/
- mlx5dr_ste_set_hit_addr(prev_htbl->ste_arr[0].hw_ste,
- new_htbl->chunk->icm_addr,
- new_htbl->chunk->num_of_entries);
+ mlx5dr_ste_set_hit_addr(dmn->ste_ctx,
+ prev_htbl->chunk->hw_ste_arr,
+ mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk),
+ mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk));
- ste_to_update = &prev_htbl->ste_arr[0];
+ ste_to_update = &prev_htbl->chunk->ste_arr[0];
} else {
- mlx5dr_ste_set_hit_addr_by_next_htbl(cur_htbl->pointing_ste->hw_ste,
+ mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
+ mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste),
new_htbl);
ste_to_update = cur_htbl->pointing_ste;
}
- mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED,
- 0, ste_to_update->hw_ste, ste_info,
- update_list, false);
+ mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL,
+ 0, mlx5dr_ste_get_hw_ste(ste_to_update),
+ ste_info, update_list, false);
return new_htbl;
@@ -478,10 +493,10 @@ static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule,
struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
enum mlx5dr_icm_chunk_size new_size;
- new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size);
+ new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size);
new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz);
- if (new_size == cur_htbl->chunk_size)
+ if (new_size == cur_htbl->chunk->size)
return NULL; /* Skip rehash, we already at the max size */
return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location,
@@ -496,6 +511,8 @@ dr_rule_handle_collision(struct mlx5dr_matcher *matcher,
struct list_head *miss_list,
struct list_head *send_list)
{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_send_info *ste_info;
struct mlx5dr_ste *new_ste;
@@ -507,8 +524,9 @@ dr_rule_handle_collision(struct mlx5dr_matcher *matcher,
if (!new_ste)
goto free_send_info;
- if (dr_rule_append_to_miss_list(new_ste, miss_list, send_list)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Failed to update prev miss_list\n");
+ if (dr_rule_append_to_miss_list(ste_ctx, new_ste,
+ miss_list, send_list)) {
+ mlx5dr_dbg(dmn, "Failed to update prev miss_list\n");
goto err_exit;
}
@@ -564,35 +582,66 @@ free_action_members:
return -ENOMEM;
}
-/* While the pointer of ste is no longer valid, like while moving ste to be
- * the first in the miss_list, and to be in the origin table,
- * all rule-members that are attached to this ste should update their ste member
- * to the new pointer
- */
-void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste,
- struct mlx5dr_ste *new_ste)
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste,
+ bool force)
{
- struct mlx5dr_rule_member *rule_mem;
+ /* Update rule member is usually done for the last STE or during rule
+ * creation to recover from mid-creation failure (for this peruse the
+ * force flag is used)
+ */
+ if (ste->next_htbl && !force)
+ return;
- if (!list_empty(&ste->rule_list))
- list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list)
- rule_mem->ste = new_ste;
+ /* Update is required since each rule keeps track of its last STE */
+ ste->rule_rx_tx = nic_rule;
+ nic_rule->last_rule_ste = ste;
+}
+
+static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste)
+{
+ struct mlx5dr_ste *first_ste;
+
+ first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste),
+ struct mlx5dr_ste, miss_list_node);
+
+ return first_ste->htbl->pointing_ste;
+}
+
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+ struct mlx5dr_ste *curr_ste,
+ int *num_of_stes)
+{
+ bool first = false;
+
+ *num_of_stes = 0;
+
+ if (!curr_ste)
+ return -ENOENT;
+
+ /* Iterate from last to first */
+ while (!first) {
+ first = curr_ste->ste_chain_location == 1;
+ ste_arr[*num_of_stes] = curr_ste;
+ *num_of_stes += 1;
+ curr_ste = dr_rule_get_pointed_ste(curr_ste);
+ }
+
+ return 0;
}
static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule)
{
- struct mlx5dr_rule_member *rule_mem;
- struct mlx5dr_rule_member *tmp_mem;
+ struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+ struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste;
+ int i;
- if (list_empty(&nic_rule->rule_members_list))
+ if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
return;
- list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) {
- list_del(&rule_mem->list);
- list_del(&rule_mem->use_ste_list);
- mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher);
- kvfree(rule_mem);
- }
+
+ while (i--)
+ mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher);
}
static u16 dr_get_bits_per_mask(u16 byte_mask)
@@ -612,43 +661,25 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
struct mlx5dr_domain_rx_tx *nic_dmn)
{
struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+ int threshold;
- if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size)
+ if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size)
return false;
- if (!ctrl->may_grow)
+ if (!mlx5dr_ste_htbl_may_grow(htbl))
return false;
- if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size)
+ if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size)
return false;
- if (ctrl->num_of_collisions >= ctrl->increase_threshold &&
- (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold)
+ threshold = mlx5dr_ste_htbl_increase_threshold(htbl);
+ if (ctrl->num_of_collisions >= threshold &&
+ (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold)
return true;
return false;
}
-static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule,
- struct mlx5dr_ste *ste)
-{
- struct mlx5dr_rule_member *rule_mem;
-
- rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL);
- if (!rule_mem)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&rule_mem->list);
- INIT_LIST_HEAD(&rule_mem->use_ste_list);
-
- rule_mem->ste = ste;
- list_add_tail(&rule_mem->list, &nic_rule->rule_members_list);
-
- list_add_tail(&rule_mem->use_ste_list, &ste->rule_list);
-
- return 0;
-}
-
static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule,
struct list_head *send_ste_list,
@@ -660,17 +691,16 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES];
u8 num_of_builders = nic_matcher->num_of_builders;
struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
u8 *curr_hw_ste, *prev_hw_ste;
struct mlx5dr_ste *action_ste;
- int i, k, ret;
+ int i, k;
/* Two cases:
* 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste
* 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added
* to support the action.
*/
- if (num_of_builders == new_hw_ste_arr_sz)
- return 0;
for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) {
curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE;
@@ -683,6 +713,10 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
mlx5dr_ste_get(action_ste);
+ action_ste->htbl->pointing_ste = last_ste;
+ last_ste->next_htbl = action_ste->htbl;
+ last_ste = action_ste;
+
/* While free ste we go over the miss list, so add this ste to the list */
list_add_tail(&action_ste->miss_list_node,
mlx5dr_ste_get_miss_list(action_ste));
@@ -693,22 +727,22 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
goto err_exit;
/* Point current ste to the new action */
- mlx5dr_ste_set_hit_addr_by_next_htbl(prev_hw_ste, action_ste->htbl);
- ret = dr_rule_add_member(nic_rule, action_ste);
- if (ret) {
- mlx5dr_dbg(matcher->tbl->dmn, "Failed adding rule member\n");
- goto free_ste_info;
- }
+ mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
+ prev_hw_ste,
+ action_ste->htbl);
+
+ mlx5dr_rule_set_last_member(nic_rule, action_ste, true);
+
mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0,
curr_hw_ste,
ste_info_arr[k],
send_ste_list, false);
}
+ last_ste->next_htbl = NULL;
+
return 0;
-free_ste_info:
- kfree(ste_info_arr[k]);
err_exit:
mlx5dr_ste_put(action_ste, matcher, nic_matcher);
return -ENOMEM;
@@ -723,7 +757,9 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
struct list_head *miss_list,
struct list_head *send_list)
{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste_send_info *ste_info;
+ u64 icm_addr;
/* Take ref on table, only on first time this ste is used */
mlx5dr_htbl_get(cur_htbl);
@@ -731,7 +767,8 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
/* new entry -> new branch */
list_add_tail(&ste->miss_list_node, miss_list);
- mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
ste->ste_chain_location = ste_location;
@@ -744,7 +781,7 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
ste,
hw_ste,
DR_CHUNK_SIZE_1)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n");
+ mlx5dr_dbg(dmn, "Failed allocating table\n");
goto clean_ste_info;
}
@@ -790,9 +827,9 @@ dr_rule_handle_ste_branch(struct mlx5dr_rule *rule,
again:
index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl);
miss_list = &cur_htbl->chunk->miss_list[index];
- ste = &cur_htbl->ste_arr[index];
+ ste = &cur_htbl->chunk->ste_arr[index];
- if (mlx5dr_ste_not_used_ste(ste)) {
+ if (mlx5dr_ste_is_not_used(ste)) {
if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
ste, ste_location,
hw_ste, miss_list,
@@ -825,9 +862,9 @@ again:
new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl,
ste_location, send_ste_list);
if (!new_htbl) {
+ mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n",
+ cur_htbl->chunk->size);
mlx5dr_htbl_put(cur_htbl);
- mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n",
- cur_htbl->chunk_size);
} else {
cur_htbl = new_htbl;
}
@@ -875,20 +912,19 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
u32 s_idx, e_idx;
if (!value_size ||
- (value_size > sizeof(struct mlx5dr_match_param) ||
- (value_size % sizeof(u32)))) {
- mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n");
+ (value_size > DR_SZ_MATCH_PARAM || (value_size % sizeof(u32)))) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule parameters length is incorrect\n");
return false;
}
- mlx5dr_ste_copy_param(matcher->match_criteria, param, value);
+ mlx5dr_ste_copy_param(matcher->match_criteria, param, value, false);
if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
s_idx = offsetof(struct mlx5dr_match_param, outer);
e_idx = min(s_idx + sizeof(param->outer), value_size);
if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n");
+ mlx5dr_err(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n");
return false;
}
}
@@ -898,7 +934,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
e_idx = min(s_idx + sizeof(param->misc), value_size);
if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n");
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n");
return false;
}
}
@@ -908,7 +944,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
e_idx = min(s_idx + sizeof(param->inner), value_size);
if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n");
+ mlx5dr_err(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n");
return false;
}
}
@@ -918,7 +954,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
e_idx = min(s_idx + sizeof(param->misc2), value_size);
if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n");
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n");
return false;
}
}
@@ -928,7 +964,28 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
e_idx = min(s_idx + sizeof(param->misc3), value_size);
if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
- mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n");
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc4);
+ e_idx = min(s_idx + sizeof(param->misc4), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn,
+ "Rule misc4 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC5) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc5);
+ e_idx = min(s_idx + sizeof(param->misc5), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n");
return false;
}
}
@@ -938,7 +995,22 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule)
{
+ /* Check if this nic rule was actually created, or was it skipped
+ * and only the other type of the RX/TX nic rule was created.
+ */
+ if (!nic_rule->last_rule_ste)
+ return 0;
+
+ mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
dr_rule_clean_rule_members(rule, nic_rule);
+
+ nic_rule->nic_matcher->rules--;
+ if (!nic_rule->nic_matcher->rules)
+ mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn,
+ nic_rule->nic_matcher);
+
+ mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
+
return 0;
}
@@ -953,6 +1025,8 @@ static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
{
struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+ mlx5dr_dbg_rule_del(rule);
+
switch (dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
dr_rule_destroy_rule_nic(rule, &rule->rx);
@@ -981,33 +1055,30 @@ static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
}
static bool dr_rule_skip(enum mlx5dr_domain_type domain,
- enum mlx5dr_ste_entry_type ste_type,
+ enum mlx5dr_domain_nic_type nic_type,
struct mlx5dr_match_param *mask,
- struct mlx5dr_match_param *value)
+ struct mlx5dr_match_param *value,
+ u32 flow_source)
{
+ bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
+
if (domain != MLX5DR_DOMAIN_TYPE_FDB)
return false;
if (mask->misc.source_port) {
- if (ste_type == MLX5DR_STE_TYPE_RX)
- if (value->misc.source_port != WIRE_PORT)
- return true;
+ if (rx && value->misc.source_port != MLX5_VPORT_UPLINK)
+ return true;
- if (ste_type == MLX5DR_STE_TYPE_TX)
- if (value->misc.source_port == WIRE_PORT)
- return true;
+ if (!rx && value->misc.source_port == MLX5_VPORT_UPLINK)
+ return true;
}
- /* Metadata C can be used to describe the source vport */
- if (mask->misc2.metadata_reg_c_0) {
- if (ste_type == MLX5DR_STE_TYPE_RX)
- if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT)
- return true;
+ if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT)
+ return true;
+
+ if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK)
+ return true;
- if (ste_type == MLX5DR_STE_TYPE_TX)
- if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT)
- return true;
- }
return false;
}
@@ -1034,35 +1105,38 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
nic_matcher = nic_rule->nic_matcher;
nic_dmn = nic_matcher->nic_tbl->nic_dmn;
- INIT_LIST_HEAD(&nic_rule->rule_members_list);
-
- if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
+ if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param,
+ rule->flow_source))
return 0;
+ hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
+ if (!hw_ste_arr)
+ return -ENOMEM;
+
+ mlx5dr_domain_nic_lock(nic_dmn);
+
+ ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher);
+ if (ret)
+ goto free_hw_ste;
+
ret = mlx5dr_matcher_select_builders(matcher,
nic_matcher,
dr_rule_get_ipv(&param->outer),
dr_rule_get_ipv(&param->inner));
if (ret)
- goto out_err;
-
- hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
- if (!hw_ste_arr) {
- ret = -ENOMEM;
- goto out_err;
- }
+ goto remove_from_nic_tbl;
/* Set the tag values inside the ste array */
ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
if (ret)
- goto free_hw_ste;
+ goto remove_from_nic_tbl;
/* Set the actions values/addresses inside the ste array */
ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions,
num_actions, hw_ste_arr,
&new_hw_ste_arr_sz);
if (ret)
- goto free_hw_ste;
+ goto remove_from_nic_tbl;
cur_htbl = nic_matcher->s_htbl;
@@ -1089,14 +1163,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
cur_htbl = ste->next_htbl;
- /* Keep all STEs in the rule struct */
- ret = dr_rule_add_member(nic_rule, ste);
- if (ret) {
- mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i);
- goto free_ste;
- }
-
mlx5dr_ste_get(ste);
+ mlx5dr_rule_set_last_member(nic_rule, ste, true);
}
/* Connect actions */
@@ -1115,12 +1183,14 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
if (htbl)
mlx5dr_htbl_put(htbl);
+ nic_matcher->rules++;
+
+ mlx5dr_domain_nic_unlock(nic_dmn);
+
kfree(hw_ste_arr);
return 0;
-free_ste:
- mlx5dr_ste_put(ste, matcher, nic_matcher);
free_rule:
dr_rule_clean_rule_members(rule, nic_rule);
/* Clean all ste_info's */
@@ -1128,9 +1198,14 @@ free_rule:
list_del(&ste_info->send_list);
kfree(ste_info);
}
+
+remove_from_nic_tbl:
+ if (!nic_matcher->rules)
+ mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher);
+
free_hw_ste:
+ mlx5dr_domain_nic_unlock(nic_dmn);
kfree(hw_ste_arr);
-out_err:
return ret;
}
@@ -1169,7 +1244,8 @@ static struct mlx5dr_rule *
dr_rule_create_rule(struct mlx5dr_matcher *matcher,
struct mlx5dr_match_parameters *value,
size_t num_actions,
- struct mlx5dr_action *actions[])
+ struct mlx5dr_action *actions[],
+ u32 flow_source)
{
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_match_param param = {};
@@ -1184,6 +1260,7 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher,
return NULL;
rule->matcher = matcher;
+ rule->flow_source = flow_source;
INIT_LIST_HEAD(&rule->rule_actions_list);
ret = dr_rule_add_action_members(rule, num_actions, actions);
@@ -1215,48 +1292,43 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher,
if (ret)
goto remove_action_members;
+ INIT_LIST_HEAD(&rule->dbg_node);
+ mlx5dr_dbg_rule_add(rule);
return rule;
remove_action_members:
dr_rule_remove_action_members(rule);
free_rule:
kfree(rule);
- mlx5dr_info(dmn, "Failed creating rule\n");
+ mlx5dr_err(dmn, "Failed creating rule\n");
return NULL;
}
struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
struct mlx5dr_match_parameters *value,
size_t num_actions,
- struct mlx5dr_action *actions[])
+ struct mlx5dr_action *actions[],
+ u32 flow_source)
{
struct mlx5dr_rule *rule;
- mutex_lock(&matcher->tbl->dmn->mutex);
refcount_inc(&matcher->refcount);
- rule = dr_rule_create_rule(matcher, value, num_actions, actions);
+ rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source);
if (!rule)
refcount_dec(&matcher->refcount);
- mutex_unlock(&matcher->tbl->dmn->mutex);
-
return rule;
}
int mlx5dr_rule_destroy(struct mlx5dr_rule *rule)
{
struct mlx5dr_matcher *matcher = rule->matcher;
- struct mlx5dr_table *tbl = rule->matcher->tbl;
int ret;
- mutex_lock(&tbl->dmn->mutex);
-
ret = dr_rule_destroy_rule(rule);
-
- mutex_unlock(&tbl->dmn->mutex);
-
if (!ret)
refcount_dec(&matcher->refcount);
+
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index c7f10d4f8f8d..ef19a66f5233 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -32,6 +32,7 @@ struct dr_qp_rtr_attr {
u8 min_rnr_timer;
u8 sgid_index;
u16 udp_src_port;
+ u8 fl:1;
};
struct dr_qp_rts_attr {
@@ -45,6 +46,7 @@ struct dr_qp_init_attr {
u32 pdn;
u32 max_send_wr;
struct mlx5_uars_page *uar;
+ u8 isolate_vl_tc:1;
};
static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
@@ -100,14 +102,10 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
return err == CQ_POLL_ERR ? err : npolled;
}
-static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
-{
- pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
-}
-
static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
struct dr_qp_init_attr *attr)
{
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
struct mlx5_wq_param wqp;
struct mlx5dr_qp *dr_qp;
@@ -136,7 +134,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
&dr_qp->wq_ctrl);
if (err) {
- mlx5_core_info(mdev, "Can't create QP WQ\n");
+ mlx5_core_warn(mdev, "Can't create QP WQ\n");
goto err_wq;
}
@@ -161,6 +159,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
MLX5_SET(qpc, qpc, pd, attr->pdn);
MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
MLX5_SET(qpc, qpc, log_page_size,
@@ -173,6 +172,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
@@ -180,14 +180,12 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
(__be64 *)MLX5_ADDR_OF(create_qp_in,
in, pas));
- err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
- kfree(in);
-
- if (err) {
- mlx5_core_warn(mdev, " Can't create QP\n");
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+ kvfree(in);
+ if (err)
goto err_in;
- }
- dr_qp->mqp.event = dr_qp_event;
dr_qp->uar = attr->uar;
return dr_qp;
@@ -204,7 +202,12 @@ err_wq:
static void dr_destroy_qp(struct mlx5_core_dev *mdev,
struct mlx5dr_qp *dr_qp)
{
- mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
+ mlx5_cmd_exec_in(mdev, destroy_qp, in);
+
kfree(dr_qp->sq.wqe_head);
mlx5_wq_destroy(&dr_qp->wq_ctrl);
kfree(dr_qp);
@@ -213,7 +216,7 @@ static void dr_destroy_qp(struct mlx5_core_dev *mdev,
static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
{
dma_wmb();
- *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
+ *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
/* After wmb() the hw aware of new work */
wmb();
@@ -223,7 +226,7 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
u32 rkey, struct dr_data_seg *data_seg,
- u32 opcode, int nreq)
+ u32 opcode, bool notify_hw)
{
struct mlx5_wqe_raddr_seg *wq_raddr;
struct mlx5_wqe_ctrl_seg *wq_ctrl;
@@ -242,7 +245,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
MLX5_WQE_CTRL_CQ_UPDATE : 0;
wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
opcode);
- wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
+ wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
wq_raddr = (void *)(wq_ctrl + 1);
wq_raddr->raddr = cpu_to_be64(remote_addr);
wq_raddr->rkey = cpu_to_be32(rkey);
@@ -255,16 +258,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
- if (nreq)
+ if (notify_hw)
dr_cmd_notify_hw(dr_qp, wq_ctrl);
}
static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
{
dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
- &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
+ &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
- &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
+ &send_info->read, MLX5_OPCODE_RDMA_READ, true);
}
/**
@@ -322,10 +325,14 @@ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
do {
ne = dr_poll_cq(send_ring->cq, 1);
- if (ne < 0)
+ if (unlikely(ne < 0)) {
+ mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
+ send_ring->qp->qpn);
+ send_ring->err_state = true;
return ne;
- else if (ne == 1)
+ } else if (ne == 1) {
send_ring->pending_wqe -= send_ring->signal_th;
+ }
} while (is_drain && send_ring->pending_wqe);
return 0;
@@ -343,7 +350,7 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
send_info->read.length = send_info->write.length;
/* Read into the same write area */
send_info->read.addr = (uintptr_t)send_info->write.addr;
- send_info->read.lkey = send_ring->mr->mkey.key;
+ send_info->read.lkey = send_ring->mr->mkey;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->read.send_flags = IB_SEND_SIGNALED;
@@ -358,9 +365,19 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
u32 buff_offset;
int ret;
+ if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+ send_ring->err_state)) {
+ mlx5_core_dbg_once(dmn->mdev,
+ "Skipping post send: QP err state: %d, device state: %d\n",
+ send_ring->err_state, dmn->mdev->state);
+ return 0;
+ }
+
+ spin_lock(&send_ring->lock);
+
ret = dr_handle_pending_wc(dmn, send_ring);
if (ret)
- return ret;
+ goto out_unlock;
if (send_info->write.length > dmn->info.max_inline_size) {
buff_offset = (send_ring->tx_head &
@@ -371,14 +388,16 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
(void *)(uintptr_t)send_info->write.addr,
send_info->write.length);
send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
- send_info->write.lkey = send_ring->mr->mkey.key;
+ send_info->write.lkey = send_ring->mr->mkey;
}
send_ring->tx_head++;
dr_fill_data_segs(send_ring, send_info);
dr_post_send(send_ring->qp, send_info);
- return 0;
+out_unlock:
+ spin_unlock(&send_ring->lock);
+ return ret;
}
static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
@@ -388,21 +407,21 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
int *iterations,
int *num_stes)
{
+ u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
int alloc_size;
- if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
- *iterations = htbl->chunk->byte_size /
- dmn->send_ring->max_post_send_size;
+ if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
+ *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
*byte_size = dmn->send_ring->max_post_send_size;
alloc_size = *byte_size;
*num_stes = *byte_size / DR_STE_SIZE;
} else {
*iterations = 1;
- *num_stes = htbl->chunk->num_of_entries;
+ *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
alloc_size = *num_stes * DR_STE_SIZE;
}
- *data = kzalloc(alloc_size, GFP_KERNEL);
+ *data = kvzalloc(alloc_size, GFP_KERNEL);
if (!*data)
return -ENOMEM;
@@ -428,11 +447,13 @@ int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
{
struct postsend_info send_info = {};
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
+
send_info.write.addr = (uintptr_t)data;
send_info.write.length = size;
send_info.write.lkey = 0;
send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
- send_info.rkey = ste->htbl->chunk->rkey;
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
return dr_postsend_icm_data(dmn, &send_info);
}
@@ -441,7 +462,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
struct mlx5dr_ste_htbl *htbl,
u8 *formatted_ste, u8 *mask)
{
- u32 byte_size = htbl->chunk->byte_size;
+ u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
int num_stes_per_iter;
int iterations;
u8 *data;
@@ -454,6 +475,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
if (ret)
return ret;
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
+
/* Send the data iteration times */
for (i = 0; i < iterations; i++) {
u32 ste_index = i * (byte_size / DR_STE_SIZE);
@@ -463,20 +486,25 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
* need to add the bit_mask
*/
for (j = 0; j < num_stes_per_iter; j++) {
- u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
+ struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
u32 ste_off = j * DR_STE_SIZE;
- if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
+ if (mlx5dr_ste_is_not_used(ste)) {
memcpy(data + ste_off,
formatted_ste, DR_STE_SIZE);
} else {
/* Copy data */
memcpy(data + ste_off,
- htbl->ste_arr[ste_index + j].hw_ste,
+ htbl->chunk->hw_ste_arr +
+ DR_STE_SIZE_REDUCED * (ste_index + j),
DR_STE_SIZE_REDUCED);
/* Copy bit_mask */
memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
mask, DR_STE_SIZE_MASK);
+ /* Only when we have mask we need to re-arrange the STE */
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
+ data + (j * DR_STE_SIZE),
+ DR_STE_SIZE);
}
}
@@ -484,8 +512,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
send_info.write.length = byte_size;
send_info.write.lkey = 0;
send_info.remote_addr =
- mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
- send_info.rkey = htbl->chunk->rkey;
+ mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
ret = dr_postsend_icm_data(dmn, &send_info);
if (ret)
@@ -493,7 +521,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
}
out_free:
- kfree(data);
+ kvfree(data);
return ret;
}
@@ -503,9 +531,10 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
u8 *ste_init_data,
bool update_hw_ste)
{
- u32 byte_size = htbl->chunk->byte_size;
+ u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
int iterations;
int num_stes;
+ u8 *copy_dst;
u8 *data;
int ret;
int i;
@@ -515,18 +544,20 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
if (ret)
return ret;
- for (i = 0; i < num_stes; i++) {
- u8 *copy_dst;
+ if (update_hw_ste) {
+ /* Copy the reduced STE to hash table ste_arr */
+ for (i = 0; i < num_stes; i++) {
+ copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+ memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
+ }
+ }
- /* Copy the same ste on the data buffer */
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
+
+ /* Copy the same STE on the data buffer */
+ for (i = 0; i < num_stes; i++) {
copy_dst = data + i * DR_STE_SIZE;
memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
-
- if (update_hw_ste) {
- /* Copy the reduced ste to hash table ste_arr */
- copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
- memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
- }
}
/* Send the data iteration times */
@@ -538,8 +569,8 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
send_info.write.length = byte_size;
send_info.write.lkey = 0;
send_info.remote_addr =
- mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
- send_info.rkey = htbl->chunk->rkey;
+ mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
ret = dr_postsend_icm_data(dmn, &send_info);
if (ret)
@@ -547,7 +578,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
}
out_free:
- kfree(data);
+ kvfree(data);
return ret;
}
@@ -557,15 +588,15 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
struct postsend_info send_info = {};
int ret;
- send_info.write.addr = (uintptr_t)action->rewrite.data;
- send_info.write.length = action->rewrite.chunk->byte_size;
+ send_info.write.addr = (uintptr_t)action->rewrite->data;
+ send_info.write.length = action->rewrite->num_of_actions *
+ DR_MODIFY_ACTION_SIZE;
send_info.write.lkey = 0;
- send_info.remote_addr = action->rewrite.chunk->mr_addr;
- send_info.rkey = action->rewrite.chunk->rkey;
+ send_info.remote_addr =
+ mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
- mutex_lock(&dmn->mutex);
ret = dr_postsend_icm_data(dmn, &send_info);
- mutex_unlock(&dmn->mutex);
return ret;
}
@@ -584,8 +615,10 @@ static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
MLX5_SET(qpc, qpc, rre, 1);
MLX5_SET(qpc, qpc, rwe, 1);
- return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
- &dr_qp->mqp);
+ MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
+
+ return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
}
static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
@@ -597,14 +630,16 @@ static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
- MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
- MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
+
+ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
- return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
- &dr_qp->mqp);
+ return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
}
static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
@@ -616,7 +651,7 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
- MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
+ MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
MLX5_SET(qpc, qpc, mtu, attr->mtu);
MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
@@ -633,10 +668,26 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
attr->udp_src_port);
MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
+ MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
MLX5_SET(qpc, qpc, min_rnr_nak, 1);
- return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
- &dr_qp->mqp);
+ MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
+
+ return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
+}
+
+static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
+{
+ /* Check whether RC RoCE QP creation with force loopback is allowed.
+ * There are two separate capability bits for this:
+ * - force loopback when RoCE is enabled
+ * - force loopback when RoCE is disabled
+ */
+ return ((caps->roce_caps.roce_en &&
+ caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
+ (!caps->roce_caps.roce_en &&
+ caps->roce_caps.fl_rc_qp_when_roce_disabled));
}
static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
@@ -651,24 +702,37 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
/* Init */
ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
- if (ret)
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify QP rst2init\n");
return ret;
+ }
/* RTR */
- ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
- if (ret)
- return ret;
-
rtr_attr.mtu = mtu;
- rtr_attr.qp_num = dr_qp->mqp.qpn;
+ rtr_attr.qp_num = dr_qp->qpn;
rtr_attr.min_rnr_timer = 12;
rtr_attr.port_num = port;
- rtr_attr.sgid_index = gid_index;
rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
+ /* If QP creation with force loopback is allowed, then there
+ * is no need for GID index when creating the QP.
+ * Otherwise we query GID attributes and use GID index.
+ */
+ rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
+ if (!rtr_attr.fl) {
+ ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
+ &rtr_attr.dgid_attr);
+ if (ret)
+ return ret;
+
+ rtr_attr.sgid_index = gid_index;
+ }
+
ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
- if (ret)
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
return ret;
+ }
/* RTS */
rts_attr.timeout = 14;
@@ -676,16 +740,18 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
rts_attr.rnr_retry = 7;
ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
- if (ret)
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
return ret;
+ }
return 0;
}
-static void dr_cq_event(struct mlx5_core_cq *mcq,
- enum mlx5_event event)
+static void dr_cq_complete(struct mlx5_core_cq *mcq,
+ struct mlx5_eqe *eqe)
{
- pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
+ pr_err("CQ completion CQ: #%u\n", mcq->cqn);
}
static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
@@ -698,7 +764,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
struct mlx5_cqe64 *cqe;
struct mlx5dr_cq *cq;
int inlen, err, eqn;
- unsigned int irqn;
void *cqc, *in;
__be64 *pas;
int vector;
@@ -731,7 +796,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
goto err_cqwq;
vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
- err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
+ err = mlx5_vector2eqn(mdev, vector, &eqn);
if (err) {
kvfree(in);
goto err_cqwq;
@@ -739,7 +804,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -748,7 +813,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
- cq->mcq.event = dr_cq_event;
+ cq->mcq.comp = dr_cq_complete;
err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
kvfree(in);
@@ -760,9 +825,13 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
*cq->mcq.set_ci_db = 0;
- *cq->mcq.arm_db = 0;
+
+ /* set no-zero value, in order to avoid the HW to run db-recovery on
+ * CQ that used in polling mode.
+ */
+ *cq->mcq.arm_db = cpu_to_be32(2 << 28);
+
cq->mcq.vector = 0;
- cq->mcq.irqn = irqn;
cq->mcq.uar = uar;
return cq;
@@ -781,8 +850,7 @@ static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
kfree(cq);
}
-static int
-dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
+static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
{
u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
void *mkc;
@@ -813,7 +881,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
if (!mr)
return NULL;
- dma_device = &mdev->pdev->dev;
+ dma_device = mlx5_core_dma_dev(mdev);
dma_addr = dma_map_single(dma_device, buf, size,
DMA_BIDIRECTIONAL);
err = dma_mapping_error(dma_device, dma_addr);
@@ -841,8 +909,8 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
{
- mlx5_core_destroy_mkey(mdev, &mr->mkey);
- dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
+ mlx5_core_destroy_mkey(mdev, mr->mkey);
+ dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
DMA_BIDIRECTIONAL);
kfree(mr);
}
@@ -861,6 +929,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
cq_size = QUEUE_SIZE + 1;
dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
if (!dmn->send_ring->cq) {
+ mlx5dr_err(dmn, "Failed creating CQ\n");
ret = -ENOMEM;
goto free_send_ring;
}
@@ -870,8 +939,15 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
init_attr.uar = dmn->uar;
init_attr.max_send_wr = QUEUE_SIZE;
+ /* Isolated VL is applicable only if force loopback is supported */
+ if (dr_send_allow_fl(&dmn->info.caps))
+ init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
+
+ spin_lock_init(&dmn->send_ring->lock);
+
dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
if (!dmn->send_ring->qp) {
+ mlx5dr_err(dmn, "Failed creating QP\n");
ret = -ENOMEM;
goto clean_cq;
}
@@ -964,7 +1040,7 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
send_info.write.lkey = 0;
/* Using the sync_mr in order to write/read */
send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
- send_info.rkey = send_ring->sync_mr->mkey.key;
+ send_info.rkey = send_ring->sync_mr->mkey;
for (i = 0; i < num_of_sends_req; i++) {
ret = dr_postsend_icm_data(dmn, &send_info);
@@ -972,7 +1048,9 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
return ret;
}
+ spin_lock(&send_ring->lock);
ret = dr_handle_pending_wc(dmn, send_ring);
+ spin_unlock(&send_ring->lock);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index aade62a9ee5c..09ebd3088857 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -3,104 +3,7 @@
#include <linux/types.h>
#include <linux/crc32.h>
-#include "dr_types.h"
-
-#define DR_STE_CRC_POLY 0xEDB88320L
-#define STE_IPV4 0x1
-#define STE_IPV6 0x2
-#define STE_TCP 0x1
-#define STE_UDP 0x2
-#define STE_SPI 0x3
-#define IP_VERSION_IPV4 0x4
-#define IP_VERSION_IPV6 0x6
-#define STE_SVLAN 0x1
-#define STE_CVLAN 0x2
-
-#define DR_STE_ENABLE_FLOW_TAG BIT(31)
-
-/* Set to STE a specific value using DR_STE_SET */
-#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \
- if ((spec)->s_fname) { \
- MLX5_SET(ste_##lookup_type, tag, t_fname, value); \
- (spec)->s_fname = 0; \
- } \
-} while (0)
-
-/* Set to STE spec->s_fname to tag->t_fname */
-#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \
- DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname)
-
-/* Set to STE -1 to bit_mask->bm_fname and set spec->s_fname as used */
-#define DR_STE_SET_MASK(lookup_type, bit_mask, bm_fname, spec, s_fname) \
- DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, -1)
-
-/* Set to STE spec->s_fname to bit_mask->bm_fname and set spec->s_fname as used */
-#define DR_STE_SET_MASK_V(lookup_type, bit_mask, bm_fname, spec, s_fname) \
- DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, (spec)->s_fname)
-
-#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \
- MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \
- MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \
-} while (0)
-
-#define DR_STE_SET_MPLS_MASK(lookup_type, mask, in_out, bit_mask) do { \
- DR_STE_SET_MASK_V(lookup_type, mask, mpls0_label, mask, \
- in_out##_first_mpls_label);\
- DR_STE_SET_MASK_V(lookup_type, mask, mpls0_s_bos, mask, \
- in_out##_first_mpls_s_bos); \
- DR_STE_SET_MASK_V(lookup_type, mask, mpls0_exp, mask, \
- in_out##_first_mpls_exp); \
- DR_STE_SET_MASK_V(lookup_type, mask, mpls0_ttl, mask, \
- in_out##_first_mpls_ttl); \
-} while (0)
-
-#define DR_STE_SET_MPLS_TAG(lookup_type, mask, in_out, tag) do { \
- DR_STE_SET_TAG(lookup_type, tag, mpls0_label, mask, \
- in_out##_first_mpls_label);\
- DR_STE_SET_TAG(lookup_type, tag, mpls0_s_bos, mask, \
- in_out##_first_mpls_s_bos); \
- DR_STE_SET_TAG(lookup_type, tag, mpls0_exp, mask, \
- in_out##_first_mpls_exp); \
- DR_STE_SET_TAG(lookup_type, tag, mpls0_ttl, mask, \
- in_out##_first_mpls_ttl); \
-} while (0)
-
-#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
- (_misc)->outer_first_mpls_over_gre_label || \
- (_misc)->outer_first_mpls_over_gre_exp || \
- (_misc)->outer_first_mpls_over_gre_s_bos || \
- (_misc)->outer_first_mpls_over_gre_ttl)
-#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
- (_misc)->outer_first_mpls_over_udp_label || \
- (_misc)->outer_first_mpls_over_udp_exp || \
- (_misc)->outer_first_mpls_over_udp_s_bos || \
- (_misc)->outer_first_mpls_over_udp_ttl)
-
-#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \
- ((inner) ? MLX5DR_STE_LU_TYPE_##lookup_type##_I : \
- (rx) ? MLX5DR_STE_LU_TYPE_##lookup_type##_D : \
- MLX5DR_STE_LU_TYPE_##lookup_type##_O)
-
-enum dr_ste_tunl_action {
- DR_STE_TUNL_ACTION_NONE = 0,
- DR_STE_TUNL_ACTION_ENABLE = 1,
- DR_STE_TUNL_ACTION_DECAP = 2,
- DR_STE_TUNL_ACTION_L3_DECAP = 3,
- DR_STE_TUNL_ACTION_POP_VLAN = 4,
-};
-
-enum dr_ste_action_type {
- DR_STE_ACTION_TYPE_PUSH_VLAN = 1,
- DR_STE_ACTION_TYPE_ENCAP_L3 = 3,
- DR_STE_ACTION_TYPE_ENCAP = 4,
-};
+#include "dr_ste.h"
struct dr_hw_ste_format {
u8 ctrl[DR_STE_SIZE_CTRL];
@@ -112,11 +15,17 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
{
u32 crc = crc32(0, input_data, length);
- return htonl(crc);
+ return (__force u32)htonl(crc);
+}
+
+bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->sw_format_ver > MLX5_STEERING_FORMAT_CONNECTX_5;
}
u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
{
+ u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
u8 masked[DR_STE_SIZE_TAG] = {};
u32 crc32, index;
@@ -124,7 +33,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
int i;
/* Don't calculate CRC if the result is predicted */
- if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0)
+ if (num_entries == 1 || htbl->byte_mask == 0)
return 0;
/* Mask tag using byte mask, bit per byte */
@@ -137,12 +46,12 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
}
crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG);
- index = crc32 & (htbl->chunk->num_of_entries - 1);
+ index = crc32 & (num_entries - 1);
return index;
}
-static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
+u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
{
u16 byte_mask = 0;
int i;
@@ -155,109 +64,18 @@ static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
return byte_mask;
}
-void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
+static u8 *dr_ste_get_tag(u8 *hw_ste_p)
{
struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK);
-}
-
-void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
-{
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
- DR_STE_ENABLE_FLOW_TAG | flow_tag);
-}
-
-void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
-{
- /* This can be used for both rx_steering_mult and for sx_transmit */
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id);
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16);
-}
-
-void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p)
-{
- MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1);
-}
-
-void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
- bool go_back)
-{
- MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
- DR_STE_ACTION_TYPE_PUSH_VLAN);
- MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
- /* Due to HW limitation we need to set this bit, otherwise reforamt +
- * push vlan will not work.
- */
- if (go_back)
- mlx5dr_ste_set_go_back_bit(hw_ste_p);
-}
-
-void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, int size, bool encap_l3)
-{
- MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
- encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP);
- /* The hardware expects here size in words (2 byte) */
- MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2);
- MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id);
-}
-
-void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p)
-{
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
- DR_STE_TUNL_ACTION_DECAP);
-}
-
-void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p)
-{
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
- DR_STE_TUNL_ACTION_POP_VLAN);
-}
-
-void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan)
-{
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
- DR_STE_TUNL_ACTION_L3_DECAP);
- MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0);
-}
-
-void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type)
-{
- MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
-}
-
-u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p)
-{
- return MLX5_GET(ste_general, hw_ste_p, entry_type);
-}
-
-void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
- u32 re_write_index)
-{
- MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions,
- num_of_actions);
- MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer,
- re_write_index);
-}
-
-void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
-{
- MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi);
+ return hw_ste->tag;
}
-void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type,
- u16 gvmi)
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
{
- MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
- MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type);
- MLX5_SET(ste_general, hw_ste_p, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE);
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- /* Set GVMI once, this is the same for RX/TX
- * bits 63_48 of next table base / miss address encode the next GVMI
- */
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi);
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi);
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
+ memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK);
}
static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste)
@@ -272,55 +90,67 @@ static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste)
hw_ste->mask[0] = 0;
}
-u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste)
+void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p, u64 miss_addr)
{
- u64 index =
- (MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_31_6) |
- MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_39_32) << 26);
-
- return index << 6;
+ ste_ctx->set_miss_addr(hw_ste_p, miss_addr);
}
-void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size)
+static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 miss_addr)
{
- u64 index = (icm_addr >> 5) | ht_size;
+ ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE);
+ ste_ctx->set_miss_addr(hw_ste, miss_addr);
+ dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste);
+}
- MLX5_SET(ste_general, hw_ste, next_table_base_39_32_size, index >> 27);
- MLX5_SET(ste_general, hw_ste, next_table_base_31_5_size, index);
+void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 icm_addr, u32 ht_size)
+{
+ ste_ctx->set_hit_addr(hw_ste, icm_addr, ht_size);
}
u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste)
{
- u32 index = ste - ste->htbl->ste_arr;
+ u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk);
+ u32 index = ste - ste->htbl->chunk->ste_arr;
- return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index;
+ return base_icm_addr + DR_STE_SIZE * index;
}
u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste)
{
- u32 index = ste - ste->htbl->ste_arr;
+ u32 index = ste - ste->htbl->chunk->ste_arr;
- return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index;
+ return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index;
+}
+
+u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste)
+{
+ u64 index = ste - ste->htbl->chunk->ste_arr;
+
+ return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index;
}
struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste)
{
- u32 index = ste - ste->htbl->ste_arr;
+ u32 index = ste - ste->htbl->chunk->ste_arr;
- return &ste->htbl->miss_list[index];
+ return &ste->htbl->chunk->miss_list[index];
}
-static void dr_ste_always_hit_htbl(struct mlx5dr_ste *ste,
+static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste,
struct mlx5dr_ste_htbl *next_htbl)
{
struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
- u8 *hw_ste = ste->hw_ste;
- MLX5_SET(ste_general, hw_ste, byte_mask, next_htbl->byte_mask);
- MLX5_SET(ste_general, hw_ste, next_lu_type, next_htbl->lu_type);
- mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+ ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask);
+ ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type);
+ ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk),
+ mlx5dr_icm_pool_get_chunk_num_of_entries(chunk));
- dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste);
+ dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste);
}
bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
@@ -343,38 +173,36 @@ bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
*/
static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
{
- memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src),
+ DR_STE_SIZE_REDUCED);
dst->next_htbl = src->next_htbl;
if (dst->next_htbl)
dst->next_htbl->pointing_ste = dst;
dst->refcount = src->refcount;
-
- INIT_LIST_HEAD(&dst->rule_list);
- list_splice_tail_init(&src->rule_list, &dst->rule_list);
}
/* Free ste which is the head and the only one in miss_list */
static void
-dr_ste_remove_head_ste(struct mlx5dr_ste *ste,
+dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste *ste,
struct mlx5dr_matcher_rx_tx *nic_matcher,
struct mlx5dr_ste_send_info *ste_info_head,
struct list_head *send_ste_list,
struct mlx5dr_ste_htbl *stats_tbl)
{
u8 tmp_data_ste[DR_STE_SIZE] = {};
- struct mlx5dr_ste tmp_ste = {};
u64 miss_addr;
- tmp_ste.hw_ste = tmp_data_ste;
+ miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
/* Use temp ste because dr_ste_always_miss_addr
* touches bit_mask area which doesn't exist at ste->hw_ste.
+ * Need to use a full-sized (DR_STE_SIZE) hw_ste.
*/
- memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
- miss_addr = nic_matcher->e_anchor->chunk->icm_addr;
- mlx5dr_ste_always_miss_addr(&tmp_ste, miss_addr);
- memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED);
+ memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED);
+ dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr);
+ memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED);
list_del_init(&ste->miss_list_node);
@@ -392,32 +220,42 @@ dr_ste_remove_head_ste(struct mlx5dr_ste *ste,
* |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0
*/
static void
-dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste,
+dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ struct mlx5dr_ste *next_ste,
struct mlx5dr_ste_send_info *ste_info_head,
struct list_head *send_ste_list,
struct mlx5dr_ste_htbl *stats_tbl)
{
struct mlx5dr_ste_htbl *next_miss_htbl;
+ u8 hw_ste[DR_STE_SIZE] = {};
+ int sb_idx;
next_miss_htbl = next_ste->htbl;
/* Remove from the miss_list the next_ste before copy */
list_del_init(&next_ste->miss_list_node);
- /* All rule-members that use next_ste should know about that */
- mlx5dr_rule_update_rule_member(next_ste, ste);
-
/* Move data from next into ste */
dr_ste_replace(ste, next_ste);
+ /* Update the rule on STE change */
+ mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false);
+
+ /* Copy all 64 hw_ste bytes */
+ memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED);
+ sb_idx = ste->ste_chain_location - 1;
+ mlx5dr_ste_set_bit_mask(hw_ste,
+ nic_matcher->ste_builder[sb_idx].bit_mask);
+
/* Del the htbl that contains the next_ste.
* The origin htbl stay with the same number of entries.
*/
mlx5dr_htbl_put(next_miss_htbl);
- mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED,
- 0, ste->hw_ste,
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE,
+ 0, hw_ste,
ste_info_head,
send_ste_list,
true /* Copy data */);
@@ -429,7 +267,8 @@ dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste,
/* Free ste that is located in the middle of the miss list:
* |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_|
*/
-static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste,
+static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste *ste,
struct mlx5dr_ste_send_info *ste_info,
struct list_head *send_ste_list,
struct mlx5dr_ste_htbl *stats_tbl)
@@ -441,12 +280,13 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste,
if (WARN_ON(!prev_ste))
return;
- miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste);
- mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr);
+ miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste));
+ ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr);
- mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0,
- prev_ste->hw_ste, ste_info,
- send_ste_list, true /* Copy data*/);
+ mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0,
+ mlx5dr_ste_get_hw_ste(prev_ste),
+ ste_info, send_ste_list,
+ true /* Copy data*/);
list_del_init(&ste->miss_list_node);
@@ -460,6 +300,7 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste,
{
struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_send_info ste_info_head;
struct mlx5dr_ste *next_ste, *first_ste;
bool put_on_origin_table = true;
@@ -488,18 +329,22 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste,
if (!next_ste) {
/* One and only entry in the list */
- dr_ste_remove_head_ste(ste, nic_matcher,
+ dr_ste_remove_head_ste(ste_ctx, ste,
+ nic_matcher,
&ste_info_head,
&send_ste_list,
stats_tbl);
} else {
/* First but not only entry in the list */
- dr_ste_replace_head_ste(ste, next_ste, &ste_info_head,
+ dr_ste_replace_head_ste(nic_matcher, ste,
+ next_ste, &ste_info_head,
&send_ste_list, stats_tbl);
put_on_origin_table = false;
}
} else { /* Ste in the middle of the list */
- dr_ste_remove_middle_ste(ste, &ste_info_head, &send_ste_list, stats_tbl);
+ dr_ste_remove_middle_ste(ste_ctx, ste,
+ &ste_info_head, &send_ste_list,
+ stats_tbl);
}
/* Update HW */
@@ -523,67 +368,49 @@ bool mlx5dr_ste_equal_tag(void *src, void *dst)
return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG);
}
-void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
+void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste,
struct mlx5dr_ste_htbl *next_htbl)
{
- struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+ u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk);
+ u32 num_entries =
+ mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk);
- mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+ ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries);
}
-void mlx5dr_ste_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p, u32 ste_size)
{
- u64 index = miss_addr >> 6;
-
- /* Miss address for TX and RX STEs located in the same offsets */
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26);
- MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index);
-}
-
-void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr)
-{
- u8 *hw_ste = ste->hw_ste;
-
- MLX5_SET(ste_rx_steering_mult, hw_ste, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE);
- mlx5dr_ste_set_miss_addr(hw_ste, miss_addr);
- dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste);
-}
-
-/* The assumption here is that we don't update the ste->hw_ste if it is not
- * used ste, so it will be all zero, checking the next_lu_type.
- */
-bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste;
-
- if (MLX5_GET(ste_general, hw_ste, next_lu_type) ==
- MLX5DR_STE_LU_TYPE_NOP)
- return true;
-
- return false;
-}
-
-bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
-{
- return !ste->refcount;
+ if (ste_ctx->prepare_for_postsend)
+ ste_ctx->prepare_for_postsend(hw_ste_p, ste_size);
}
/* Init one ste as a pattern for ste data array */
-void mlx5dr_ste_set_formatted_ste(u16 gvmi,
- struct mlx5dr_domain_rx_tx *nic_dmn,
+void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ u16 gvmi,
+ enum mlx5dr_domain_nic_type nic_type,
struct mlx5dr_ste_htbl *htbl,
u8 *formatted_ste,
struct mlx5dr_htbl_connect_info *connect_info)
{
- struct mlx5dr_ste ste = {};
+ bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
+ u8 tmp_hw_ste[DR_STE_SIZE] = {0};
- mlx5dr_ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi);
- ste.hw_ste = formatted_ste;
+ ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi);
+ /* Use temp ste because dr_ste_always_miss_addr/hit_htbl
+ * touches bit_mask area which doesn't exist at ste->hw_ste.
+ * Need to use a full-sized (DR_STE_SIZE) hw_ste.
+ */
+ memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED);
if (connect_info->type == CONNECT_HIT)
- dr_ste_always_hit_htbl(&ste, connect_info->hit_next_htbl);
+ dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste,
+ connect_info->hit_next_htbl);
else
- mlx5dr_ste_always_miss_addr(&ste, connect_info->miss_icm_addr);
+ dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste,
+ connect_info->miss_icm_addr);
+ memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED);
}
int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
@@ -594,8 +421,9 @@ int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
{
u8 formatted_ste[DR_STE_SIZE] = {};
- mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi,
- nic_dmn,
+ mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
+ dmn->info.caps.gvmi,
+ nic_dmn->type,
htbl,
formatted_ste,
connect_info);
@@ -609,18 +437,18 @@ int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
u8 *cur_hw_ste,
enum mlx5dr_icm_chunk_size log_table_size)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)cur_hw_ste;
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_htbl_connect_info info;
struct mlx5dr_ste_htbl *next_htbl;
if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) {
- u8 next_lu_type;
+ u16 next_lu_type;
u16 byte_mask;
- next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type);
- byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask);
+ next_lu_type = ste_ctx->get_next_lu_type(cur_hw_ste);
+ byte_mask = ste_ctx->get_byte_mask(cur_hw_ste);
next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
log_table_size,
@@ -633,14 +461,16 @@ int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
/* Write new table to HW */
info.type = CONNECT_MISS;
- info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+ info.miss_icm_addr =
+ mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl,
&info, false)) {
mlx5dr_info(dmn, "Failed writing table to HW\n");
goto free_table;
}
- mlx5dr_ste_set_hit_addr_by_next_htbl(cur_hw_ste, next_htbl);
+ mlx5dr_ste_set_hit_addr_by_next_htbl(ste_ctx,
+ cur_hw_ste, next_htbl);
ste->next_htbl = next_htbl;
next_htbl->pointing_ste = ste;
}
@@ -652,27 +482,13 @@ free_table:
return -ENOENT;
}
-static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl)
-{
- struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
- int num_of_entries;
-
- htbl->ctrl.may_grow = true;
-
- if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
- htbl->ctrl.may_grow = false;
-
- /* Threshold is 50%, one is added to table of size 1 */
- num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
- ctrl->increase_threshold = (num_of_entries + 1) / 2;
-}
-
struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
enum mlx5dr_icm_chunk_size chunk_size,
- u8 lu_type, u16 byte_mask)
+ u16 lu_type, u16 byte_mask)
{
struct mlx5dr_icm_chunk *chunk;
struct mlx5dr_ste_htbl *htbl;
+ u32 num_entries;
int i;
htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
@@ -686,24 +502,18 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
htbl->chunk = chunk;
htbl->lu_type = lu_type;
htbl->byte_mask = byte_mask;
- htbl->ste_arr = chunk->ste_arr;
- htbl->hw_ste_arr = chunk->hw_ste_arr;
- htbl->miss_list = chunk->miss_list;
htbl->refcount = 0;
+ num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
- for (i = 0; i < chunk->num_of_entries; i++) {
- struct mlx5dr_ste *ste = &htbl->ste_arr[i];
+ for (i = 0; i < num_entries; i++) {
+ struct mlx5dr_ste *ste = &chunk->ste_arr[i];
- ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
ste->htbl = htbl;
ste->refcount = 0;
INIT_LIST_HEAD(&ste->miss_list_node);
- INIT_LIST_HEAD(&htbl->miss_list[i]);
- INIT_LIST_HEAD(&ste->rule_list);
+ INIT_LIST_HEAD(&chunk->miss_list[i]);
}
- htbl->chunk_size = chunk_size;
- dr_ste_set_ctrl(htbl);
return htbl;
out_free_htbl:
@@ -721,18 +531,141 @@ int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl)
return 0;
}
+void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps,
+ hw_ste_arr, attr, added_stes);
+}
+
+void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps,
+ hw_ste_arr, attr, added_stes);
+}
+
+const struct mlx5dr_ste_action_modify_field *
+mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field)
+{
+ const struct mlx5dr_ste_action_modify_field *hw_field;
+
+ if (sw_field >= ste_ctx->modify_field_arr_sz)
+ return NULL;
+
+ hw_field = &ste_ctx->modify_field_arr[sw_field];
+ if (!hw_field->end && !hw_field->start)
+ return NULL;
+
+ return hw_field;
+}
+
+void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ ste_ctx->set_action_set((u8 *)hw_action,
+ hw_field, shifter, length, data);
+}
+
+void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ ste_ctx->set_action_add((u8 *)hw_action,
+ hw_field, shifter, length, data);
+}
+
+void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
+{
+ ste_ctx->set_action_copy((u8 *)hw_action,
+ dst_hw_field, dst_shifter, dst_len,
+ src_hw_field, src_shifter);
+}
+
+int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
+ void *data, u32 data_sz,
+ u8 *hw_action, u32 hw_action_sz,
+ u16 *used_hw_action_num)
+{
+ /* Only Ethernet frame is supported, with VLAN (18) or without (14) */
+ if (data_sz != HDR_LEN_L2 && data_sz != HDR_LEN_L2_W_VLAN)
+ return -EINVAL;
+
+ return ste_ctx->set_action_decap_l3_list(data, data_sz,
+ hw_action, hw_action_sz,
+ used_hw_action_num);
+}
+
+static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
+ struct mlx5dr_match_spec *spec)
+{
+ if (spec->ip_version) {
+ if (spec->ip_version != 0xf) {
+ mlx5dr_err(dmn,
+ "Partial ip_version mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+ } else if (spec->ethertype != 0xffff &&
+ (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) {
+ mlx5dr_err(dmn,
+ "Partial/no ethertype mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
u8 match_criteria,
struct mlx5dr_match_param *mask,
struct mlx5dr_match_param *value)
{
- if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
+ if (value)
+ return 0;
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
- mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n");
+ mlx5dr_err(dmn,
+ "Partial mask source_port is not supported\n");
+ return -EINVAL;
+ }
+ if (mask->misc.source_eswitch_owner_vhca_id &&
+ mask->misc.source_eswitch_owner_vhca_id != 0xffff) {
+ mlx5dr_err(dmn,
+ "Partial mask source_eswitch_owner_vhca_id is not supported\n");
return -EINVAL;
}
}
+ if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->outer))
+ return -EINVAL;
+
+ if ((match_criteria & DR_MATCHER_CRITERIA_INNER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->inner))
+ return -EINVAL;
+
return 0;
}
@@ -742,7 +675,9 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
u8 *ste_arr)
{
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ bool is_rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_build *sb;
int ret, i;
@@ -753,14 +688,14 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
sb = nic_matcher->ste_builder;
for (i = 0; i < nic_matcher->num_of_builders; i++) {
- mlx5dr_ste_init(ste_arr,
- sb->lu_type,
- nic_dmn->ste_type,
- dmn->info.caps.gvmi);
+ ste_ctx->ste_init(ste_arr,
+ sb->lu_type,
+ is_rx,
+ dmn->info.caps.gvmi);
mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
- ret = sb->ste_build_tag_func(value, sb, ste_arr);
+ ret = sb->ste_build_tag_func(value, sb, dr_ste_get_tag(ste_arr));
if (ret)
return ret;
@@ -770,147 +705,127 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
* not relevant for the last ste in the chain.
*/
sb++;
- MLX5_SET(ste_general, ste_arr, next_lu_type, sb->lu_type);
- MLX5_SET(ste_general, ste_arr, byte_mask, sb->byte_mask);
+ ste_ctx->set_next_lu_type(ste_arr, sb->lu_type);
+ ste_ctx->set_byte_mask(ste_arr, sb->byte_mask);
}
ste_arr += DR_STE_SIZE;
}
return 0;
}
-static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
- DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
-
- if (mask->smac_47_16 || mask->smac_15_0) {
- MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32,
- mask->smac_47_16 >> 16);
- MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0,
- mask->smac_47_16 << 16 | mask->smac_15_0);
- mask->smac_47_16 = 0;
- mask->smac_15_0 = 0;
- }
-
- DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid);
- DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi);
- DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio);
- DR_STE_SET_MASK(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version);
-
- if (mask->cvlan_tag) {
- MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
- mask->cvlan_tag = 0;
- } else if (mask->svlan_tag) {
- MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
- mask->svlan_tag = 0;
- }
-
- if (mask->cvlan_tag || mask->svlan_tag) {
- pr_info("Invalid c/svlan mask configuration\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
-{
- spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present);
- spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present);
- spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present);
- spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port);
- spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn);
-
- spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port);
- spec->source_eswitch_owner_vhca_id = MLX5_GET(fte_match_set_misc, mask,
- source_eswitch_owner_vhca_id);
-
- spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio);
- spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi);
- spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid);
- spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio);
- spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi);
- spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid);
+#define IFC_GET_CLR(typ, p, fld, clear) ({ \
+ void *__p = (p); \
+ u32 __t = MLX5_GET(typ, __p, fld); \
+ if (clear) \
+ MLX5_SET(typ, __p, fld, 0); \
+ __t; \
+})
+
+#define memcpy_and_clear(to, from, len, clear) ({ \
+ void *__to = (to), *__from = (from); \
+ size_t __len = (len); \
+ memcpy(__to, __from, __len); \
+ if (clear) \
+ memset(__from, 0, __len); \
+})
+
+static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bool clr)
+{
+ spec->gre_c_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_c_present, clr);
+ spec->gre_k_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_k_present, clr);
+ spec->gre_s_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_s_present, clr);
+ spec->source_vhca_port = IFC_GET_CLR(fte_match_set_misc, mask, source_vhca_port, clr);
+ spec->source_sqn = IFC_GET_CLR(fte_match_set_misc, mask, source_sqn, clr);
+
+ spec->source_port = IFC_GET_CLR(fte_match_set_misc, mask, source_port, clr);
+ spec->source_eswitch_owner_vhca_id =
+ IFC_GET_CLR(fte_match_set_misc, mask, source_eswitch_owner_vhca_id, clr);
+
+ spec->outer_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_prio, clr);
+ spec->outer_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cfi, clr);
+ spec->outer_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_vid, clr);
+ spec->inner_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_prio, clr);
+ spec->inner_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cfi, clr);
+ spec->inner_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_vid, clr);
spec->outer_second_cvlan_tag =
- MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag);
+ IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cvlan_tag, clr);
spec->inner_second_cvlan_tag =
- MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag);
+ IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cvlan_tag, clr);
spec->outer_second_svlan_tag =
- MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag);
+ IFC_GET_CLR(fte_match_set_misc, mask, outer_second_svlan_tag, clr);
spec->inner_second_svlan_tag =
- MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag);
-
- spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol);
+ IFC_GET_CLR(fte_match_set_misc, mask, inner_second_svlan_tag, clr);
+ spec->gre_protocol = IFC_GET_CLR(fte_match_set_misc, mask, gre_protocol, clr);
- spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi);
- spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo);
+ spec->gre_key_h = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.hi, clr);
+ spec->gre_key_l = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.lo, clr);
- spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni);
+ spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr);
- spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni);
- spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam);
+ spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr);
+ spec->geneve_tlv_option_0_exist =
+ IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr);
+ spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr);
spec->outer_ipv6_flow_label =
- MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label);
+ IFC_GET_CLR(fte_match_set_misc, mask, outer_ipv6_flow_label, clr);
spec->inner_ipv6_flow_label =
- MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label);
+ IFC_GET_CLR(fte_match_set_misc, mask, inner_ipv6_flow_label, clr);
- spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len);
+ spec->geneve_opt_len = IFC_GET_CLR(fte_match_set_misc, mask, geneve_opt_len, clr);
spec->geneve_protocol_type =
- MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type);
+ IFC_GET_CLR(fte_match_set_misc, mask, geneve_protocol_type, clr);
- spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp);
+ spec->bth_dst_qp = IFC_GET_CLR(fte_match_set_misc, mask, bth_dst_qp, clr);
}
-static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec)
+static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bool clr)
{
- u32 raw_ip[4];
+ __be32 raw_ip[4];
- spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16);
+ spec->smac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_47_16, clr);
- spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0);
- spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype);
+ spec->smac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_15_0, clr);
+ spec->ethertype = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ethertype, clr);
- spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16);
+ spec->dmac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_47_16, clr);
- spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0);
- spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio);
- spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi);
- spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid);
+ spec->dmac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_15_0, clr);
+ spec->first_prio = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_prio, clr);
+ spec->first_cfi = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_cfi, clr);
+ spec->first_vid = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_vid, clr);
- spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol);
- spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp);
- spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn);
- spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag);
- spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag);
- spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag);
- spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version);
- spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags);
- spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport);
- spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport);
+ spec->ip_protocol = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_protocol, clr);
+ spec->ip_dscp = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_dscp, clr);
+ spec->ip_ecn = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_ecn, clr);
+ spec->cvlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, cvlan_tag, clr);
+ spec->svlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, svlan_tag, clr);
+ spec->frag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, frag, clr);
+ spec->ip_version = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_version, clr);
+ spec->tcp_flags = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_flags, clr);
+ spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr);
+ spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr);
- spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit);
+ spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr);
+ spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr);
- spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport);
- spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport);
+ spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr);
+ spec->udp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_dport, clr);
- memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(raw_ip));
+ memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(raw_ip), clr);
spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]);
spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]);
spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]);
spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]);
- memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(raw_ip));
+ memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(raw_ip), clr);
spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]);
spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]);
@@ -918,76 +833,125 @@ static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec)
spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]);
}
-static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec)
+static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec, bool clr)
{
spec->outer_first_mpls_label =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_label, clr);
spec->outer_first_mpls_exp =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp, clr);
spec->outer_first_mpls_s_bos =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos, clr);
spec->outer_first_mpls_ttl =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl, clr);
spec->inner_first_mpls_label =
- MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label);
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_label, clr);
spec->inner_first_mpls_exp =
- MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp);
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp, clr);
spec->inner_first_mpls_s_bos =
- MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos);
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos, clr);
spec->inner_first_mpls_ttl =
- MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl);
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl, clr);
spec->outer_first_mpls_over_gre_label =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label, clr);
spec->outer_first_mpls_over_gre_exp =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp, clr);
spec->outer_first_mpls_over_gre_s_bos =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos, clr);
spec->outer_first_mpls_over_gre_ttl =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl, clr);
spec->outer_first_mpls_over_udp_label =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label, clr);
spec->outer_first_mpls_over_udp_exp =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp, clr);
spec->outer_first_mpls_over_udp_s_bos =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos, clr);
spec->outer_first_mpls_over_udp_ttl =
- MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl);
- spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7);
- spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6);
- spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5);
- spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4);
- spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3);
- spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2);
- spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1);
- spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0);
- spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a);
- spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b);
-}
-
-static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
-{
- spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num);
- spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num);
- spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num);
- spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num);
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl, clr);
+ spec->metadata_reg_c_7 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_7, clr);
+ spec->metadata_reg_c_6 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_6, clr);
+ spec->metadata_reg_c_5 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_5, clr);
+ spec->metadata_reg_c_4 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_4, clr);
+ spec->metadata_reg_c_3 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_3, clr);
+ spec->metadata_reg_c_2 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_2, clr);
+ spec->metadata_reg_c_1 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_1, clr);
+ spec->metadata_reg_c_0 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_0, clr);
+ spec->metadata_reg_a = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_a, clr);
+}
+
+static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec, bool clr)
+{
+ spec->inner_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_seq_num, clr);
+ spec->outer_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_seq_num, clr);
+ spec->inner_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_ack_num, clr);
+ spec->outer_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_ack_num, clr);
spec->outer_vxlan_gpe_vni =
- MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni);
+ IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_vni, clr);
spec->outer_vxlan_gpe_next_protocol =
- MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol);
+ IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol, clr);
spec->outer_vxlan_gpe_flags =
- MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags);
- spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data);
+ IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_flags, clr);
+ spec->icmpv4_header_data = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_header_data, clr);
spec->icmpv6_header_data =
- MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data);
- spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type);
- spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code);
- spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type);
- spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code);
+ IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_header_data, clr);
+ spec->icmpv4_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_type, clr);
+ spec->icmpv4_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_code, clr);
+ spec->icmpv6_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_type, clr);
+ spec->icmpv6_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_code, clr);
+ spec->geneve_tlv_option_0_data =
+ IFC_GET_CLR(fte_match_set_misc3, mask, geneve_tlv_option_0_data, clr);
+ spec->gtpu_teid = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_teid, clr);
+ spec->gtpu_msg_flags = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_flags, clr);
+ spec->gtpu_msg_type = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_type, clr);
+ spec->gtpu_dw_0 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_0, clr);
+ spec->gtpu_dw_2 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_2, clr);
+ spec->gtpu_first_ext_dw_0 =
+ IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_first_ext_dw_0, clr);
+}
+
+static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, bool clr)
+{
+ spec->prog_sample_field_id_0 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_0, clr);
+ spec->prog_sample_field_value_0 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_0, clr);
+ spec->prog_sample_field_id_1 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_1, clr);
+ spec->prog_sample_field_value_1 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_1, clr);
+ spec->prog_sample_field_id_2 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_2, clr);
+ spec->prog_sample_field_value_2 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_2, clr);
+ spec->prog_sample_field_id_3 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_3, clr);
+ spec->prog_sample_field_value_3 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr);
+}
+
+static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr)
+{
+ spec->macsec_tag_0 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr);
+ spec->macsec_tag_1 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr);
+ spec->macsec_tag_2 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr);
+ spec->macsec_tag_3 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr);
+ spec->tunnel_header_0 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr);
+ spec->tunnel_header_1 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr);
+ spec->tunnel_header_2 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr);
+ spec->tunnel_header_3 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr);
}
void mlx5dr_ste_copy_param(u8 match_criteria,
struct mlx5dr_match_param *set_param,
- struct mlx5dr_match_parameters *mask)
+ struct mlx5dr_match_parameters *mask,
+ bool clr)
{
u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {};
u8 *data = (u8 *)mask->match_buf;
@@ -1001,7 +965,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
} else {
buff = mask->match_buf;
}
- dr_ste_copy_mask_spec(buff, &set_param->outer);
+ dr_ste_copy_mask_spec(buff, &set_param->outer, clr);
}
param_location = sizeof(struct mlx5dr_match_spec);
@@ -1014,7 +978,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
} else {
buff = data + param_location;
}
- dr_ste_copy_mask_misc(buff, &set_param->misc);
+ dr_ste_copy_mask_misc(buff, &set_param->misc, clr);
}
param_location += sizeof(struct mlx5dr_match_misc);
@@ -1027,7 +991,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
} else {
buff = data + param_location;
}
- dr_ste_copy_mask_spec(buff, &set_param->inner);
+ dr_ste_copy_mask_spec(buff, &set_param->inner, clr);
}
param_location += sizeof(struct mlx5dr_match_spec);
@@ -1040,7 +1004,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
} else {
buff = data + param_location;
}
- dr_ste_copy_mask_misc2(buff, &set_param->misc2);
+ dr_ste_copy_mask_misc2(buff, &set_param->misc2, clr);
}
param_location += sizeof(struct mlx5dr_match_misc2);
@@ -1054,601 +1018,130 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
} else {
buff = data + param_location;
}
- dr_ste_copy_mask_misc3(buff, &set_param->misc3);
+ dr_ste_copy_mask_misc3(buff, &set_param->misc3, clr);
}
-}
-static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
- DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
-
- if (spec->smac_47_16 || spec->smac_15_0) {
- MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32,
- spec->smac_47_16 >> 16);
- MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0,
- spec->smac_47_16 << 16 | spec->smac_15_0);
- spec->smac_47_16 = 0;
- spec->smac_15_0 = 0;
- }
+ param_location += sizeof(struct mlx5dr_match_misc3);
- if (spec->ip_version) {
- if (spec->ip_version == IP_VERSION_IPV4) {
- MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4);
- spec->ip_version = 0;
- } else if (spec->ip_version == IP_VERSION_IPV6) {
- MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6);
- spec->ip_version = 0;
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc4)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
} else {
- pr_info("Unsupported ip_version value\n");
- return -EINVAL;
+ buff = data + param_location;
}
+ dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr);
}
- DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid);
- DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi);
- DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio);
+ param_location += sizeof(struct mlx5dr_match_misc4);
- if (spec->cvlan_tag) {
- MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN);
- spec->cvlan_tag = 0;
- } else if (spec->svlan_tag) {
- MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN);
- spec->svlan_tag = 0;
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC5) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc5)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr);
}
- return 0;
}
-int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- int ret;
-
- ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
- if (ret)
- return ret;
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag;
-
- return 0;
-}
-
-static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_127_96, mask, dst_ip_127_96);
- DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_95_64, mask, dst_ip_95_64);
- DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_63_32, mask, dst_ip_63_32);
- DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_31_0, mask, dst_ip_31_0);
-}
-
-static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
- DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
- DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
- DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
-
- return 0;
+ ste_ctx->build_eth_l2_src_dst_init(sb, mask);
}
-void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l3_ipv6_dst_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_dst_tag;
+ ste_ctx->build_eth_l3_ipv6_dst_init(sb, mask);
}
-static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_127_96, mask, src_ip_127_96);
- DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_95_64, mask, src_ip_95_64);
- DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_63_32, mask, src_ip_63_32);
- DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_31_0, mask, src_ip_31_0);
-}
-
-static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
- DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
- DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
- DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
-
- return 0;
-}
-
-void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l3_ipv6_src_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_src_tag;
-}
-
-static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param *value,
- bool inner,
- u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- destination_address, mask, dst_ip_31_0);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- source_address, mask, src_ip_31_0);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- destination_port, mask, tcp_dport);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- destination_port, mask, udp_dport);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- source_port, mask, tcp_sport);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- source_port, mask, udp_sport);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- protocol, mask, ip_protocol);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- fragmented, mask, frag);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- dscp, mask, ip_dscp);
- DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
- ecn, mask, ip_ecn);
-
- if (mask->tcp_flags) {
- DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, bit_mask, mask);
- mask->tcp_flags = 0;
- }
+ ste_ctx->build_eth_l3_ipv6_src_init(sb, mask);
}
-static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp);
- DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn);
-
- if (spec->tcp_flags) {
- DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec);
- spec->tcp_flags = 0;
- }
-
- return 0;
-}
-
-void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_5_tuple_tag;
+ ste_ctx->build_eth_l3_ipv4_5_tuple_init(sb, mask);
}
-static void
-dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
- struct mlx5dr_match_misc *misc_mask = &value->misc;
-
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_cfi, mask, first_cfi);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_priority, mask, first_prio);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, ip_fragmented, mask, frag);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype);
- DR_STE_SET_MASK(eth_l2_src, bit_mask, l3_type, mask, ip_version);
-
- if (mask->svlan_tag || mask->cvlan_tag) {
- MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1);
- mask->cvlan_tag = 0;
- mask->svlan_tag = 0;
- }
-
- if (inner) {
- if (misc_mask->inner_second_cvlan_tag ||
- misc_mask->inner_second_svlan_tag) {
- MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
- misc_mask->inner_second_cvlan_tag = 0;
- misc_mask->inner_second_svlan_tag = 0;
- }
-
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
- second_vlan_id, misc_mask, inner_second_vid);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
- second_cfi, misc_mask, inner_second_cfi);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
- second_priority, misc_mask, inner_second_prio);
- } else {
- if (misc_mask->outer_second_cvlan_tag ||
- misc_mask->outer_second_svlan_tag) {
- MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
- misc_mask->outer_second_cvlan_tag = 0;
- misc_mask->outer_second_svlan_tag = 0;
- }
-
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
- second_vlan_id, misc_mask, outer_second_vid);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
- second_cfi, misc_mask, outer_second_cfi);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
- second_priority, misc_mask, outer_second_prio);
- }
-}
-
-static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
- bool inner, u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
- struct mlx5dr_match_misc *misc_spec = &value->misc;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
- DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
- DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio);
- DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag);
- DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype);
-
- if (spec->ip_version) {
- if (spec->ip_version == IP_VERSION_IPV4) {
- MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4);
- spec->ip_version = 0;
- } else if (spec->ip_version == IP_VERSION_IPV6) {
- MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6);
- spec->ip_version = 0;
- } else {
- pr_info("Unsupported ip_version value\n");
- return -EINVAL;
- }
- }
-
- if (spec->cvlan_tag) {
- MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN);
- spec->cvlan_tag = 0;
- } else if (spec->svlan_tag) {
- MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN);
- spec->svlan_tag = 0;
- }
-
- if (inner) {
- if (misc_spec->inner_second_cvlan_tag) {
- MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
- misc_spec->inner_second_cvlan_tag = 0;
- } else if (misc_spec->inner_second_svlan_tag) {
- MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
- misc_spec->inner_second_svlan_tag = 0;
- }
-
- DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid);
- DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi);
- DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio);
- } else {
- if (misc_spec->outer_second_cvlan_tag) {
- MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
- misc_spec->outer_second_cvlan_tag = 0;
- } else if (misc_spec->outer_second_svlan_tag) {
- MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
- misc_spec->outer_second_svlan_tag = 0;
- }
- DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid);
- DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi);
- DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio);
- }
-
- return 0;
-}
-
-static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16);
- DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0);
-
- dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
-}
-
-static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
- DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
-
- return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
-}
-
-void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l2_src_bit_mask(mask, inner, sb->bit_mask);
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_tag;
-}
-
-static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
- DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
-
- dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
-}
-
-static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
- DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
-
- return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+ ste_ctx->build_eth_l2_src_init(sb, mask);
}
-void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l2_dst_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l2_dst_tag;
-}
-
-static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
- struct mlx5dr_match_misc *misc = &value->misc;
-
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16);
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0);
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid);
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi);
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_priority, mask, first_prio);
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag);
- DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype);
- DR_STE_SET_MASK(eth_l2_tnl, bit_mask, l3_type, mask, ip_version);
-
- if (misc->vxlan_vni) {
- MLX5_SET(ste_eth_l2_tnl, bit_mask,
- l2_tunneling_network_id, (misc->vxlan_vni << 8));
- misc->vxlan_vni = 0;
- }
-
- if (mask->svlan_tag || mask->cvlan_tag) {
- MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1);
- mask->cvlan_tag = 0;
- mask->svlan_tag = 0;
- }
+ ste_ctx->build_eth_l2_dst_init(sb, mask);
}
-static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc *misc = &value->misc;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
- DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
- DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid);
- DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi);
- DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag);
- DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio);
- DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype);
-
- if (misc->vxlan_vni) {
- MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id,
- (misc->vxlan_vni << 8));
- misc->vxlan_vni = 0;
- }
-
- if (spec->cvlan_tag) {
- MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN);
- spec->cvlan_tag = 0;
- } else if (spec->svlan_tag) {
- MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN);
- spec->svlan_tag = 0;
- }
-
- if (spec->ip_version) {
- if (spec->ip_version == IP_VERSION_IPV4) {
- MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4);
- spec->ip_version = 0;
- } else if (spec->ip_version == IP_VERSION_IPV6) {
- MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6);
- spec->ip_version = 0;
- } else {
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask, bool inner, bool rx)
{
- dr_ste_build_eth_l2_tnl_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l2_tnl_tag;
+ ste_ctx->build_eth_l2_tnl_init(sb, mask);
}
-static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l3_ipv4_misc, bit_mask, time_to_live, mask, ttl_hoplimit);
-}
-
-static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
-
- return 0;
-}
-
-void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l3_ipv4_misc_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_misc_tag;
-}
-
-static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
-
- DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, tcp_dport);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, tcp_sport);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, udp_dport);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, udp_sport);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, protocol, mask, ip_protocol);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, fragmented, mask, frag);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, dscp, mask, ip_dscp);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, ecn, mask, ip_ecn);
- DR_STE_SET_MASK_V(eth_l4, bit_mask, ipv6_hop_limit, mask, ttl_hoplimit);
-
- if (mask->tcp_flags) {
- DR_STE_SET_TCP_FLAGS(eth_l4, bit_mask, mask);
- mask->tcp_flags = 0;
- }
-}
-
-static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
- DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
- DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport);
- DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport);
- DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol);
- DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag);
- DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp);
- DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
- DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
-
- if (spec->tcp_flags) {
- DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
- spec->tcp_flags = 0;
- }
-
- return 0;
+ ste_ctx->build_eth_l3_ipv4_misc_init(sb, mask);
}
-void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_ipv6_l3_l4_tag;
+ ste_ctx->build_eth_ipv6_l3_l4_init(sb, mask);
}
static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
return 0;
}
@@ -1661,705 +1154,236 @@ void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx)
sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag;
}
-static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
-
- if (inner)
- DR_STE_SET_MPLS_MASK(mpls, misc2_mask, inner, bit_mask);
- else
- DR_STE_SET_MPLS_MASK(mpls, misc2_mask, outer, bit_mask);
-}
-
-static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
- u8 *tag = hw_ste->tag;
-
- if (sb->inner)
- DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag);
- else
- DR_STE_SET_MPLS_TAG(mpls, misc2_mask, outer, tag);
-
- return 0;
-}
-
-void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_mpls_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_mpls_tag;
-}
-
-static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_misc *misc_mask = &value->misc;
-
- DR_STE_SET_MASK_V(gre, bit_mask, gre_protocol, misc_mask, gre_protocol);
- DR_STE_SET_MASK_V(gre, bit_mask, gre_k_present, misc_mask, gre_k_present);
- DR_STE_SET_MASK_V(gre, bit_mask, gre_key_h, misc_mask, gre_key_h);
- DR_STE_SET_MASK_V(gre, bit_mask, gre_key_l, misc_mask, gre_key_l);
-
- DR_STE_SET_MASK_V(gre, bit_mask, gre_c_present, misc_mask, gre_c_present);
- DR_STE_SET_MASK_V(gre, bit_mask, gre_s_present, misc_mask, gre_s_present);
-}
-
-static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc *misc = &value->misc;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
-
- DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present);
- DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h);
- DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l);
-
- DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present);
-
- DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present);
-
- return 0;
+ ste_ctx->build_mpls_init(sb, mask);
}
-void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask, bool inner, bool rx)
+void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_GRE;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_gre_tag;
-}
-
-static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
-
- if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label,
- misc_2_mask, outer_first_mpls_over_gre_label);
-
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp,
- misc_2_mask, outer_first_mpls_over_gre_exp);
-
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos,
- misc_2_mask, outer_first_mpls_over_gre_s_bos);
-
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl,
- misc_2_mask, outer_first_mpls_over_gre_ttl);
- } else {
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label,
- misc_2_mask, outer_first_mpls_over_udp_label);
-
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp,
- misc_2_mask, outer_first_mpls_over_udp_exp);
-
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos,
- misc_2_mask, outer_first_mpls_over_udp_s_bos);
-
- DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl,
- misc_2_mask, outer_first_mpls_over_udp_ttl);
- }
+ ste_ctx->build_tnl_gre_init(sb, mask);
}
-static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
- u8 *tag = hw_ste->tag;
-
- if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
- misc_2_mask, outer_first_mpls_over_gre_label);
-
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
- misc_2_mask, outer_first_mpls_over_gre_exp);
-
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
- misc_2_mask, outer_first_mpls_over_gre_s_bos);
-
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
- misc_2_mask, outer_first_mpls_over_gre_ttl);
- } else {
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
- misc_2_mask, outer_first_mpls_over_udp_label);
-
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
- misc_2_mask, outer_first_mpls_over_udp_exp);
-
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
- misc_2_mask, outer_first_mpls_over_udp_s_bos);
-
- DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
- misc_2_mask, outer_first_mpls_over_udp_ttl);
- }
- return 0;
-}
-
-void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_0;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_flex_parser_0_tag;
-}
-
-#define ICMP_TYPE_OFFSET_FIRST_DW 24
-#define ICMP_CODE_OFFSET_FIRST_DW 16
-#define ICMP_HEADER_DATA_OFFSET_SECOND_DW 0
-
-static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask,
- struct mlx5dr_cmd_caps *caps,
- u8 *bit_mask)
-{
- struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3;
- bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask);
- u32 icmp_header_data_mask;
- u32 icmp_type_mask;
- u32 icmp_code_mask;
- int dw0_location;
- int dw1_location;
-
- if (is_ipv4_mask) {
- icmp_header_data_mask = misc_3_mask->icmpv4_header_data;
- icmp_type_mask = misc_3_mask->icmpv4_type;
- icmp_code_mask = misc_3_mask->icmpv4_code;
- dw0_location = caps->flex_parser_id_icmp_dw0;
- dw1_location = caps->flex_parser_id_icmp_dw1;
- } else {
- icmp_header_data_mask = misc_3_mask->icmpv6_header_data;
- icmp_type_mask = misc_3_mask->icmpv6_type;
- icmp_code_mask = misc_3_mask->icmpv6_code;
- dw0_location = caps->flex_parser_id_icmpv6_dw0;
- dw1_location = caps->flex_parser_id_icmpv6_dw1;
- }
-
- switch (dw0_location) {
- case 4:
- if (icmp_type_mask) {
- MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4,
- (icmp_type_mask << ICMP_TYPE_OFFSET_FIRST_DW));
- if (is_ipv4_mask)
- misc_3_mask->icmpv4_type = 0;
- else
- misc_3_mask->icmpv6_type = 0;
- }
- if (icmp_code_mask) {
- u32 cur_val = MLX5_GET(ste_flex_parser_1, bit_mask,
- flex_parser_4);
- MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4,
- cur_val | (icmp_code_mask << ICMP_CODE_OFFSET_FIRST_DW));
- if (is_ipv4_mask)
- misc_3_mask->icmpv4_code = 0;
- else
- misc_3_mask->icmpv6_code = 0;
- }
- break;
- default:
- return -EINVAL;
- }
-
- switch (dw1_location) {
- case 5:
- if (icmp_header_data_mask) {
- MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_5,
- (icmp_header_data_mask << ICMP_HEADER_DATA_OFFSET_SECOND_DW));
- if (is_ipv4_mask)
- misc_3_mask->icmpv4_header_data = 0;
- else
- misc_3_mask->icmpv6_header_data = 0;
- }
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
- u8 *tag = hw_ste->tag;
- u32 icmp_header_data;
- int dw0_location;
- int dw1_location;
- u32 icmp_type;
- u32 icmp_code;
- bool is_ipv4;
-
- is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3);
- if (is_ipv4) {
- icmp_header_data = misc_3->icmpv4_header_data;
- icmp_type = misc_3->icmpv4_type;
- icmp_code = misc_3->icmpv4_code;
- dw0_location = sb->caps->flex_parser_id_icmp_dw0;
- dw1_location = sb->caps->flex_parser_id_icmp_dw1;
- } else {
- icmp_header_data = misc_3->icmpv6_header_data;
- icmp_type = misc_3->icmpv6_type;
- icmp_code = misc_3->icmpv6_code;
- dw0_location = sb->caps->flex_parser_id_icmpv6_dw0;
- dw1_location = sb->caps->flex_parser_id_icmpv6_dw1;
- }
-
- switch (dw0_location) {
- case 4:
- if (icmp_type) {
- MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
- (icmp_type << ICMP_TYPE_OFFSET_FIRST_DW));
- if (is_ipv4)
- misc_3->icmpv4_type = 0;
- else
- misc_3->icmpv6_type = 0;
- }
-
- if (icmp_code) {
- u32 cur_val = MLX5_GET(ste_flex_parser_1, tag,
- flex_parser_4);
- MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
- cur_val | (icmp_code << ICMP_CODE_OFFSET_FIRST_DW));
- if (is_ipv4)
- misc_3->icmpv4_code = 0;
- else
- misc_3->icmpv6_code = 0;
- }
- break;
- default:
- return -EINVAL;
- }
-
- switch (dw1_location) {
- case 5:
- if (icmp_header_data) {
- MLX5_SET(ste_flex_parser_1, tag, flex_parser_5,
- (icmp_header_data << ICMP_HEADER_DATA_OFFSET_SECOND_DW));
- if (is_ipv4)
- misc_3->icmpv4_header_data = 0;
- else
- misc_3->icmpv6_header_data = 0;
- }
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
+ sb->caps = caps;
+ return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask);
}
-int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- struct mlx5dr_cmd_caps *caps,
- bool inner, bool rx)
+void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- int ret;
-
- ret = dr_ste_build_flex_parser_1_bit_mask(mask, caps, sb->bit_mask);
- if (ret)
- return ret;
-
sb->rx = rx;
sb->inner = inner;
sb->caps = caps;
- sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_1;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_flex_parser_1_tag;
-
- return 0;
-}
-
-static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
-
- DR_STE_SET_MASK_V(general_purpose, bit_mask,
- general_purpose_lookup_field, misc_2_mask,
- metadata_reg_a);
+ return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask);
}
-static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
- misc_2_mask, metadata_reg_a);
-
- return 0;
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->caps = caps;
+ ste_ctx->build_icmp_init(sb, mask);
}
-void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_general_purpose_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_general_purpose_tag;
-}
-
-static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
-{
- struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3;
-
- if (inner) {
- DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask,
- inner_tcp_seq_num);
- DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask,
- inner_tcp_ack_num);
- } else {
- DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask,
- outer_tcp_seq_num);
- DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask,
- outer_tcp_ack_num);
- }
+ ste_ctx->build_general_purpose_init(sb, mask);
}
-static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc3 *misc3 = &value->misc3;
- u8 *tag = hw_ste->tag;
-
- if (sb->inner) {
- DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
- DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num);
- } else {
- DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num);
- DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num);
- }
-
- return 0;
-}
-
-void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_eth_l4_misc_bit_mask(mask, inner, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, rx, inner);
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_eth_l4_misc_tag;
+ ste_ctx->build_eth_l4_misc_init(sb, mask);
}
-static void
-dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
+void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3;
-
- DR_STE_SET_MASK_V(flex_parser_tnl_vxlan_gpe, bit_mask,
- outer_vxlan_gpe_flags,
- misc_3_mask, outer_vxlan_gpe_flags);
- DR_STE_SET_MASK_V(flex_parser_tnl_vxlan_gpe, bit_mask,
- outer_vxlan_gpe_next_protocol,
- misc_3_mask, outer_vxlan_gpe_next_protocol);
- DR_STE_SET_MASK_V(flex_parser_tnl_vxlan_gpe, bit_mask,
- outer_vxlan_gpe_vni,
- misc_3_mask, outer_vxlan_gpe_vni);
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_vxlan_gpe_init(sb, mask);
}
-static int
-dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc3 *misc3 = &value->misc3;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
- outer_vxlan_gpe_flags, misc3,
- outer_vxlan_gpe_flags);
- DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
- outer_vxlan_gpe_next_protocol, misc3,
- outer_vxlan_gpe_next_protocol);
- DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
- outer_vxlan_gpe_vni, misc3,
- outer_vxlan_gpe_vni);
-
- return 0;
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_geneve_init(sb, mask);
}
-void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(mask, inner,
- sb->bit_mask);
-
sb->rx = rx;
+ sb->caps = caps;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_vxlan_gpe_tag;
+ ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask);
}
-static void
-dr_ste_build_flex_parser_tnl_geneve_bit_mask(struct mlx5dr_match_param *value,
- u8 *bit_mask)
-{
- struct mlx5dr_match_misc *misc_mask = &value->misc;
-
- DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask,
- geneve_protocol_type,
- misc_mask, geneve_protocol_type);
- DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask,
- geneve_oam,
- misc_mask, geneve_oam);
- DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask,
- geneve_opt_len,
- misc_mask, geneve_opt_len);
- DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask,
- geneve_vni,
- misc_mask, geneve_vni);
-}
-
-static int
-dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc *misc = &value->misc;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
- geneve_protocol_type, misc, geneve_protocol_type);
- DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
- geneve_oam, misc, geneve_oam);
- DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
- geneve_opt_len, misc, geneve_opt_len);
- DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
- geneve_vni, misc, geneve_vni);
+ if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init)
+ return;
- return 0;
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask);
}
-void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- dr_ste_build_flex_parser_tnl_geneve_bit_mask(mask, sb->bit_mask);
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_geneve_tag;
+ ste_ctx->build_tnl_gtpu_init(sb, mask);
}
-static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value,
- u8 *bit_mask)
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
-
- DR_STE_SET_MASK_V(register_0, bit_mask, register_0_h,
- misc_2_mask, metadata_reg_c_0);
- DR_STE_SET_MASK_V(register_0, bit_mask, register_0_l,
- misc_2_mask, metadata_reg_c_1);
- DR_STE_SET_MASK_V(register_0, bit_mask, register_1_h,
- misc_2_mask, metadata_reg_c_2);
- DR_STE_SET_MASK_V(register_0, bit_mask, register_1_l,
- misc_2_mask, metadata_reg_c_3);
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask);
}
-static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc2 *misc2 = &value->misc2;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
- DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
- DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
- DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
-
- return 0;
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask);
}
-void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx)
{
- dr_ste_build_register_0_bit_mask(mask, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_register_0_tag;
+ ste_ctx->build_register_0_init(sb, mask);
}
-static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value,
- u8 *bit_mask)
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
-
- DR_STE_SET_MASK_V(register_1, bit_mask, register_2_h,
- misc_2_mask, metadata_reg_c_4);
- DR_STE_SET_MASK_V(register_1, bit_mask, register_2_l,
- misc_2_mask, metadata_reg_c_5);
- DR_STE_SET_MASK_V(register_1, bit_mask, register_3_h,
- misc_2_mask, metadata_reg_c_6);
- DR_STE_SET_MASK_V(register_1, bit_mask, register_3_l,
- misc_2_mask, metadata_reg_c_7);
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_register_1_init(sb, mask);
}
-static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn,
+ bool inner, bool rx)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc2 *misc2 = &value->misc2;
- u8 *tag = hw_ste->tag;
-
- DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
- DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
- DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
- DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+ /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
+ sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
- return 0;
+ sb->rx = rx;
+ sb->dmn = dmn;
+ sb->inner = inner;
+ ste_ctx->build_src_gvmi_qpn_init(sb, mask);
}
-void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- dr_ste_build_register_1_bit_mask(mask, sb->bit_mask);
-
sb->rx = rx;
sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_register_1_tag;
+ ste_ctx->build_flex_parser_0_init(sb, mask);
}
-static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
- u8 *bit_mask)
+void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- struct mlx5dr_match_misc *misc_mask = &value->misc;
-
- /* Partial misc source_port is not supported */
- if (misc_mask->source_port && misc_mask->source_port != 0xffff)
- return -EINVAL;
-
- /* Partial misc source_eswitch_owner_vhca_id is not supported */
- if (misc_mask->source_eswitch_owner_vhca_id &&
- misc_mask->source_eswitch_owner_vhca_id != 0xffff)
- return -EINVAL;
-
- DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
- DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
- misc_mask->source_eswitch_owner_vhca_id = 0;
-
- return 0;
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_flex_parser_1_init(sb, mask);
}
-static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
- struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- struct mlx5dr_match_misc *misc = &value->misc;
- struct mlx5dr_cmd_vport_cap *vport_cap;
- struct mlx5dr_domain *dmn = sb->dmn;
- struct mlx5dr_cmd_caps *caps;
- u8 *bit_mask = sb->bit_mask;
- u8 *tag = hw_ste->tag;
- bool source_gvmi_set;
-
- DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
-
- if (sb->vhca_id_valid) {
- /* Find port GVMI based on the eswitch_owner_vhca_id */
- if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
- caps = &dmn->info.caps;
- else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
- dmn->peer_dmn->info.caps.gvmi))
- caps = &dmn->peer_dmn->info.caps;
- else
- return -EINVAL;
- } else {
- caps = &dmn->info.caps;
- }
-
- vport_cap = mlx5dr_get_vport_cap(caps, misc->source_port);
- if (!vport_cap)
- return -EINVAL;
-
- source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi);
- if (vport_cap->vport_gvmi && source_gvmi_set)
- MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
-
- misc->source_eswitch_owner_vhca_id = 0;
- misc->source_port = 0;
-
- return 0;
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_header_0_1_init(sb, mask);
}
-int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- struct mlx5dr_domain *dmn,
- bool inner, bool rx)
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version)
{
- int ret;
+ if (version == MLX5_STEERING_FORMAT_CONNECTX_5)
+ return mlx5dr_ste_get_ctx_v0();
+ else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX)
+ return mlx5dr_ste_get_ctx_v1();
+ else if (version == MLX5_STEERING_FORMAT_CONNECTX_7)
+ return mlx5dr_ste_get_ctx_v2();
- /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
- sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
-
- ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
- if (ret)
- return ret;
-
- sb->rx = rx;
- sb->dmn = dmn;
- sb->inner = inner;
- sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
- sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
- sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag;
-
- return 0;
+ return NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
new file mode 100644
index 000000000000..17513baff9b0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#ifndef _DR_STE_
+#define _DR_STE_
+
+#include "dr_types.h"
+
+#define STE_IPV4 0x1
+#define STE_IPV6 0x2
+#define STE_TCP 0x1
+#define STE_UDP 0x2
+#define STE_SPI 0x3
+#define IP_VERSION_IPV4 0x4
+#define IP_VERSION_IPV6 0x6
+#define STE_SVLAN 0x1
+#define STE_CVLAN 0x2
+#define HDR_LEN_L2_MACS 0xC
+#define HDR_LEN_L2_VLAN 0x4
+#define HDR_LEN_L2_ETHER 0x2
+#define HDR_LEN_L2 (HDR_LEN_L2_MACS + HDR_LEN_L2_ETHER)
+#define HDR_LEN_L2_W_VLAN (HDR_LEN_L2 + HDR_LEN_L2_VLAN)
+
+/* Set to STE a specific value using DR_STE_SET */
+#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \
+ if ((spec)->s_fname) { \
+ MLX5_SET(ste_##lookup_type, tag, t_fname, value); \
+ (spec)->s_fname = 0; \
+ } \
+} while (0)
+
+/* Set to STE spec->s_fname to tag->t_fname set spec->s_fname as used */
+#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname)
+
+/* Set to STE -1 to tag->t_fname and set spec->s_fname as used */
+#define DR_STE_SET_ONES(lookup_type, tag, t_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, -1)
+
+#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \
+} while (0)
+
+#define DR_STE_SET_MPLS(lookup_type, mask, in_out, tag) do { \
+ struct mlx5dr_match_misc2 *_mask = mask; \
+ u8 *_tag = tag; \
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_label, _mask, \
+ in_out##_first_mpls_label);\
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_s_bos, _mask, \
+ in_out##_first_mpls_s_bos); \
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_exp, _mask, \
+ in_out##_first_mpls_exp); \
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_ttl, _mask, \
+ in_out##_first_mpls_ttl); \
+} while (0)
+
+#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \
+ u8 parser_id = (caps)->flex_parser_id_##fname; \
+ u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \
+ *(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\
+ (spec)->fname = 0;\
+} while (0)
+
+#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_gre_label || \
+ (_misc)->outer_first_mpls_over_gre_exp || \
+ (_misc)->outer_first_mpls_over_gre_s_bos || \
+ (_misc)->outer_first_mpls_over_gre_ttl)
+
+#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_udp_label || \
+ (_misc)->outer_first_mpls_over_udp_exp || \
+ (_misc)->outer_first_mpls_over_udp_s_bos || \
+ (_misc)->outer_first_mpls_over_udp_ttl)
+
+enum dr_ste_action_modify_type_l3 {
+ DR_STE_ACTION_MDFY_TYPE_L3_NONE = 0x0,
+ DR_STE_ACTION_MDFY_TYPE_L3_IPV4 = 0x1,
+ DR_STE_ACTION_MDFY_TYPE_L3_IPV6 = 0x2,
+};
+
+enum dr_ste_action_modify_type_l4 {
+ DR_STE_ACTION_MDFY_TYPE_L4_NONE = 0x0,
+ DR_STE_ACTION_MDFY_TYPE_L4_TCP = 0x1,
+ DR_STE_ACTION_MDFY_TYPE_L4_UDP = 0x2,
+};
+
+enum {
+ HDR_MPLS_OFFSET_LABEL = 12,
+ HDR_MPLS_OFFSET_EXP = 9,
+ HDR_MPLS_OFFSET_S_BOS = 8,
+ HDR_MPLS_OFFSET_TTL = 0,
+};
+
+u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask);
+
+static inline u8 *
+dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id)
+{
+ /* Calculate tag byte offset based on flex parser id */
+ return tag + 4 * (3 - (parser_id % 4));
+}
+
+#define DR_STE_CTX_BUILDER(fname) \
+ ((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \
+ struct mlx5dr_match_param *mask))
+
+struct mlx5dr_ste_ctx {
+ /* Builders */
+ void DR_STE_CTX_BUILDER(eth_l2_src_dst);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv6_src);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv6_dst);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv4_5_tuple);
+ void DR_STE_CTX_BUILDER(eth_l2_src);
+ void DR_STE_CTX_BUILDER(eth_l2_dst);
+ void DR_STE_CTX_BUILDER(eth_l2_tnl);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv4_misc);
+ void DR_STE_CTX_BUILDER(eth_ipv6_l3_l4);
+ void DR_STE_CTX_BUILDER(mpls);
+ void DR_STE_CTX_BUILDER(tnl_gre);
+ void DR_STE_CTX_BUILDER(tnl_mpls);
+ void DR_STE_CTX_BUILDER(tnl_mpls_over_gre);
+ void DR_STE_CTX_BUILDER(tnl_mpls_over_udp);
+ void DR_STE_CTX_BUILDER(icmp);
+ void DR_STE_CTX_BUILDER(general_purpose);
+ void DR_STE_CTX_BUILDER(eth_l4_misc);
+ void DR_STE_CTX_BUILDER(tnl_vxlan_gpe);
+ void DR_STE_CTX_BUILDER(tnl_geneve);
+ void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt);
+ void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist);
+ void DR_STE_CTX_BUILDER(register_0);
+ void DR_STE_CTX_BUILDER(register_1);
+ void DR_STE_CTX_BUILDER(src_gvmi_qpn);
+ void DR_STE_CTX_BUILDER(flex_parser_0);
+ void DR_STE_CTX_BUILDER(flex_parser_1);
+ void DR_STE_CTX_BUILDER(tnl_gtpu);
+ void DR_STE_CTX_BUILDER(tnl_header_0_1);
+ void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0);
+ void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1);
+
+ /* Getters and Setters */
+ void (*ste_init)(u8 *hw_ste_p, u16 lu_type,
+ bool is_rx, u16 gvmi);
+ void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type);
+ u16 (*get_next_lu_type)(u8 *hw_ste_p);
+ void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr);
+ u64 (*get_miss_addr)(u8 *hw_ste_p);
+ void (*set_hit_addr)(u8 *hw_ste_p, u64 icm_addr, u32 ht_size);
+ void (*set_byte_mask)(u8 *hw_ste_p, u16 byte_mask);
+ u16 (*get_byte_mask)(u8 *hw_ste_p);
+
+ /* Actions */
+ u32 actions_caps;
+ void (*set_actions_rx)(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+ void (*set_actions_tx)(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+ u32 modify_field_arr_sz;
+ const struct mlx5dr_ste_action_modify_field *modify_field_arr;
+ void (*set_action_set)(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+ void (*set_action_add)(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+ void (*set_action_copy)(u8 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter);
+ int (*set_action_decap_l3_list)(void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num);
+
+ /* Send */
+ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size);
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void);
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void);
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void);
+
+#endif /* _DR_STE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
new file mode 100644
index 000000000000..2010d4ac6519
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -0,0 +1,1960 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include "dr_ste.h"
+
+#define SVLAN_ETHERTYPE 0x88a8
+#define DR_STE_ENABLE_FLOW_TAG BIT(31)
+
+enum dr_ste_v0_entry_type {
+ DR_STE_TYPE_TX = 1,
+ DR_STE_TYPE_RX = 2,
+ DR_STE_TYPE_MODIFY_PKT = 6,
+};
+
+enum dr_ste_v0_action_tunl {
+ DR_STE_TUNL_ACTION_NONE = 0,
+ DR_STE_TUNL_ACTION_ENABLE = 1,
+ DR_STE_TUNL_ACTION_DECAP = 2,
+ DR_STE_TUNL_ACTION_L3_DECAP = 3,
+ DR_STE_TUNL_ACTION_POP_VLAN = 4,
+};
+
+enum dr_ste_v0_action_type {
+ DR_STE_ACTION_TYPE_PUSH_VLAN = 1,
+ DR_STE_ACTION_TYPE_ENCAP_L3 = 3,
+ DR_STE_ACTION_TYPE_ENCAP = 4,
+};
+
+enum dr_ste_v0_action_mdfy_op {
+ DR_STE_ACTION_MDFY_OP_COPY = 0x1,
+ DR_STE_ACTION_MDFY_OP_SET = 0x2,
+ DR_STE_ACTION_MDFY_OP_ADD = 0x3,
+};
+
+#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \
+ ((inner) ? DR_STE_V0_LU_TYPE_##lookup_type##_I : \
+ (rx) ? DR_STE_V0_LU_TYPE_##lookup_type##_D : \
+ DR_STE_V0_LU_TYPE_##lookup_type##_O)
+
+enum {
+ DR_STE_V0_LU_TYPE_NOP = 0x00,
+ DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP = 0x05,
+ DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I = 0x0a,
+ DR_STE_V0_LU_TYPE_ETHL2_DST_O = 0x06,
+ DR_STE_V0_LU_TYPE_ETHL2_DST_I = 0x07,
+ DR_STE_V0_LU_TYPE_ETHL2_DST_D = 0x1b,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_O = 0x08,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_I = 0x09,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_D = 0x1c,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_O = 0x36,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_I = 0x37,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_D = 0x38,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b,
+ DR_STE_V0_LU_TYPE_ETHL4_O = 0x13,
+ DR_STE_V0_LU_TYPE_ETHL4_I = 0x14,
+ DR_STE_V0_LU_TYPE_ETHL4_D = 0x21,
+ DR_STE_V0_LU_TYPE_ETHL4_MISC_O = 0x2c,
+ DR_STE_V0_LU_TYPE_ETHL4_MISC_I = 0x2d,
+ DR_STE_V0_LU_TYPE_ETHL4_MISC_D = 0x2e,
+ DR_STE_V0_LU_TYPE_MPLS_FIRST_O = 0x15,
+ DR_STE_V0_LU_TYPE_MPLS_FIRST_I = 0x24,
+ DR_STE_V0_LU_TYPE_MPLS_FIRST_D = 0x25,
+ DR_STE_V0_LU_TYPE_GRE = 0x16,
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0 = 0x22,
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 = 0x23,
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19,
+ DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18,
+ DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f,
+ DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30,
+ DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34,
+ DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE,
+};
+
+enum {
+ DR_STE_V0_ACTION_MDFY_FLD_L2_0 = 0,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_1 = 1,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_2 = 2,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_0 = 3,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_1 = 4,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_2 = 5,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_3 = 6,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_4 = 7,
+ DR_STE_V0_ACTION_MDFY_FLD_L4_0 = 8,
+ DR_STE_V0_ACTION_MDFY_FLD_L4_1 = 9,
+ DR_STE_V0_ACTION_MDFY_FLD_MPLS = 10,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_0 = 11,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_0 = 12,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_1 = 13,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_2 = 14,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_3 = 15,
+ DR_STE_V0_ACTION_MDFY_FLD_L4_2 = 16,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_0 = 17,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_1 = 18,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_2 = 19,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_3 = 20,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_1 = 21,
+ DR_STE_V0_ACTION_MDFY_FLD_METADATA = 22,
+ DR_STE_V0_ACTION_MDFY_FLD_RESERVED = 23,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v0_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 16, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 32, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 16, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 0, .end = 5,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 48, .end = 56,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 0, .end = 15,
+ },
+};
+
+static void dr_ste_v0_set_entry_type(u8 *hw_ste_p, u8 entry_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
+}
+
+static u8 dr_ste_v0_get_entry_type(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, entry_type);
+}
+
+static void dr_ste_v0_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+{
+ u64 index = miss_addr >> 6;
+
+ /* Miss address for TX and RX STEs located in the same offsets */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index);
+}
+
+static u64 dr_ste_v0_get_miss_addr(u8 *hw_ste_p)
+{
+ u64 index =
+ ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6) |
+ ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32)) << 26);
+
+ return index << 6;
+}
+
+static void dr_ste_v0_set_byte_mask(u8 *hw_ste_p, u16 byte_mask)
+{
+ MLX5_SET(ste_general, hw_ste_p, byte_mask, byte_mask);
+}
+
+static u16 dr_ste_v0_get_byte_mask(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, byte_mask);
+}
+
+static void dr_ste_v0_set_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type);
+}
+
+static void dr_ste_v0_set_next_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, next_lu_type, lu_type);
+}
+
+static u16 dr_ste_v0_get_next_lu_type(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, next_lu_type);
+}
+
+static void dr_ste_v0_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
+{
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi);
+}
+
+static void dr_ste_v0_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
+{
+ u64 index = (icm_addr >> 5) | ht_size;
+
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_39_32_size, index >> 27);
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_31_5_size, index);
+}
+
+static void dr_ste_v0_init_full(u8 *hw_ste_p, u16 lu_type,
+ enum dr_ste_v0_entry_type entry_type, u16 gvmi)
+{
+ dr_ste_v0_set_entry_type(hw_ste_p, entry_type);
+ dr_ste_v0_set_lu_type(hw_ste_p, lu_type);
+ dr_ste_v0_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
+
+ /* Set GVMI once, this is the same for RX/TX
+ * bits 63_48 of next table base / miss address encode the next GVMI
+ */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
+}
+
+static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
+ bool is_rx, u16 gvmi)
+{
+ enum dr_ste_v0_entry_type entry_type;
+
+ entry_type = is_rx ? DR_STE_TYPE_RX : DR_STE_TYPE_TX;
+ dr_ste_v0_init_full(hw_ste_p, lu_type, entry_type, gvmi);
+}
+
+static void dr_ste_v0_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
+ DR_STE_ENABLE_FLOW_TAG | flow_tag);
+}
+
+static void dr_ste_v0_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
+{
+ /* This can be used for both rx_steering_mult and for sx_transmit */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16);
+}
+
+static void dr_ste_v0_set_go_back_bit(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1);
+}
+
+static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
+ bool go_back)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+ DR_STE_ACTION_TYPE_PUSH_VLAN);
+ MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
+ /* Due to HW limitation we need to set this bit, otherwise reformat +
+ * push vlan will not work.
+ */
+ if (go_back)
+ dr_ste_v0_set_go_back_bit(hw_ste_p);
+}
+
+static void dr_ste_v0_set_tx_encap(void *hw_ste_p, u32 reformat_id,
+ int size, bool encap_l3)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+ encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2);
+ MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id);
+}
+
+static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_DECAP);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1);
+}
+
+static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_POP_VLAN);
+}
+
+static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_L3_DECAP);
+ MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1);
+}
+
+static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
+ u32 re_write_index)
+{
+ MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions,
+ num_of_actions);
+ MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer,
+ re_write_index);
+}
+
+static void dr_ste_v0_arr_init_next(u8 **last_ste,
+ u32 *added_stes,
+ enum dr_ste_v0_entry_type entry_type,
+ u16 gvmi)
+{
+ (*added_stes)++;
+ *last_ste += DR_STE_SIZE;
+ dr_ste_v0_init_full(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE,
+ entry_type, gvmi);
+}
+
+static void
+dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] ||
+ action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3];
+
+ /* We want to make sure the modify header comes before L2
+ * encapsulation. The reason for that is that we support
+ * modify headers for outer headers only
+ */
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_rewrite_actions(last_ste,
+ attr->modify_actions,
+ attr->modify_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_TX,
+ attr->gvmi);
+
+ dr_ste_v0_set_tx_push_vlan(last_ste,
+ attr->vlans.headers[i],
+ encap);
+ }
+ }
+
+ if (encap) {
+ /* Modify header and encapsulation require a different STEs.
+ * Since modify header STE format doesn't support encapsulation
+ * tunneling_action.
+ */
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] ||
+ action_type_set[DR_ACTION_TYP_PUSH_VLAN])
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_TX,
+ attr->gvmi);
+
+ dr_ste_v0_set_tx_encap(last_ste,
+ attr->reformat.id,
+ attr->reformat.size,
+ action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]);
+ /* Whenever prio_tag_required enabled, we can be sure that the
+ * previous table (ACL) already push vlan to our packet,
+ * And due to HW limitation we need to set this bit, otherwise
+ * push vlan + reformat will not work.
+ */
+ if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required))
+ dr_ste_v0_set_go_back_bit(last_ste);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v0_set_counter_id(last_ste, attr->ctr_id);
+
+ dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+static void
+dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v0_set_counter_id(last_ste, attr->ctr_id);
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
+ dr_ste_v0_set_rewrite_actions(last_ste,
+ attr->decap_actions,
+ attr->decap_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2])
+ dr_ste_v0_set_rx_decap(last_ste);
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (i ||
+ action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] ||
+ action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_RX,
+ attr->gvmi);
+
+ dr_ste_v0_set_rx_pop_vlan(last_ste);
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
+ if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_MODIFY_PKT,
+ attr->gvmi);
+ else
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
+
+ dr_ste_v0_set_rewrite_actions(last_ste,
+ attr->modify_actions,
+ attr->modify_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TAG]) {
+ if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_RX,
+ attr->gvmi);
+
+ dr_ste_v0_rx_set_flow_tag(last_ste, attr->flow_tag);
+ }
+
+ dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+static void dr_ste_v0_set_action_set(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ length = (length == 32) ? 0 : length;
+ MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, length);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+}
+
+static void dr_ste_v0_set_action_add(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ length = (length == 32) ? 0 : length;
+ MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_ADD);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, length);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+}
+
+static void dr_ste_v0_set_action_copy(u8 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
+{
+ MLX5_SET(dr_action_hw_copy, hw_action, opcode, DR_STE_ACTION_MDFY_OP_COPY);
+ MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code, dst_hw_field);
+ MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter, dst_shifter);
+ MLX5_SET(dr_action_hw_copy, hw_action, destination_length, dst_len);
+ MLX5_SET(dr_action_hw_copy, hw_action, source_field_code, src_hw_field);
+ MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter, src_shifter);
+}
+
+#define DR_STE_DECAP_L3_MIN_ACTION_NUM 5
+
+static int
+dr_ste_v0_set_action_decap_l3_list(void *data, u32 data_sz,
+ u8 *hw_action, u32 hw_action_sz,
+ u16 *used_hw_action_num)
+{
+ struct mlx5_ifc_l2_hdr_bits *l2_hdr = data;
+ u32 hw_action_num;
+ int required_actions;
+ u32 hdr_fld_4b;
+ u16 hdr_fld_2b;
+ u16 vlan_type;
+ bool vlan;
+
+ vlan = (data_sz != HDR_LEN_L2);
+ hw_action_num = hw_action_sz / MLX5_ST_SZ_BYTES(dr_action_hw_set);
+ required_actions = DR_STE_DECAP_L3_MIN_ACTION_NUM + !!vlan;
+
+ if (hw_action_num < required_actions)
+ return -ENOMEM;
+
+ /* dmac_47_16 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 16);
+ hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ inline_data, hdr_fld_4b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* smac_47_16 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, 16);
+ hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 |
+ MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* dmac_15_0 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 0);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ inline_data, hdr_fld_2b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* ethertype + (optional) vlan */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 32);
+ if (!vlan) {
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, 16);
+ } else {
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+ vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN;
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan);
+ hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b;
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, 18);
+ }
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* smac_15_0 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 0);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ if (vlan) {
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ inline_data, hdr_fld_2b);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 0);
+ }
+
+ *used_hw_action_num = required_actions;
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ if (mask->smac_47_16 || mask->smac_15_0) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32,
+ mask->smac_47_16 >> 16);
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0,
+ mask->smac_47_16 << 16 | mask->smac_15_0);
+ mask->smac_47_16 = 0;
+ mask->smac_15_0 = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_ONES(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ } else if (mask->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+ mask->svlan_tag = 0;
+ }
+}
+
+static int
+dr_ste_v0_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+ if (spec->smac_47_16 || spec->smac_15_0) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32,
+ spec->smac_47_16 >> 16);
+ MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0,
+ spec->smac_47_16 << 16 | spec->smac_15_0);
+ spec->smac_47_16 = 0;
+ spec->smac_15_0 = 0;
+ }
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio);
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_dst_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_dst_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_src_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_src, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_mask->inner_second_cvlan_tag ||
+ misc_mask->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->inner_second_cvlan_tag = 0;
+ misc_mask->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_vlan_id, misc_mask, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_cfi, misc_mask, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_priority, misc_mask, inner_second_prio);
+ } else {
+ if (misc_mask->outer_second_cvlan_tag ||
+ misc_mask->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->outer_second_cvlan_tag = 0;
+ misc_mask->outer_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_vlan_id, misc_mask, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_cfi, misc_mask, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_priority, misc_mask, outer_second_prio);
+ }
+}
+
+static int
+dr_ste_v0_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
+ bool inner, u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_spec = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype);
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_spec->inner_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->inner_second_cvlan_tag = 0;
+ } else if (misc_spec->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio);
+ } else {
+ if (misc_spec->outer_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->outer_second_cvlan_tag = 0;
+ } else if (misc_spec->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->outer_second_svlan_tag = 0;
+ }
+ DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio);
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0);
+
+ dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int
+dr_ste_v0_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
+
+ return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask);
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_tag;
+}
+
+static void
+dr_ste_v0_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, sb->inner, bit_mask);
+}
+
+static int
+dr_ste_v0_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+ return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+static void
+dr_ste_v0_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_dst_bit_mask(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_dst_tag;
+}
+
+static void
+dr_ste_v0_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_tnl, bit_mask, l3_type, mask, ip_version);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl, bit_mask,
+ l2_tunneling_network_id, (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+}
+
+static int
+dr_ste_v0_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id,
+ (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_tnl_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
+ DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_misc_tag;
+}
+
+static int
+dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
+ DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+
+ if (sb->inner)
+ DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, inner_ipv6_flow_label);
+ else
+ DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, outer_ipv6_flow_label);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_ipv6_l3_l4_tag;
+}
+
+static int
+dr_ste_v0_build_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ if (sb->inner)
+ DR_STE_SET_MPLS(mpls, misc2, inner, tag);
+ else
+ DR_STE_SET_MPLS(mpls, misc2, outer, tag);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_mpls_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
+
+ DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present);
+ DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h);
+ DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l);
+
+ DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present);
+
+ DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_gre_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_GRE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gre_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc_2 = &value->misc2;
+ u32 mpls_hdr;
+
+ if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) {
+ mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+ misc_2->outer_first_mpls_over_gre_label = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+ misc_2->outer_first_mpls_over_gre_exp = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc_2->outer_first_mpls_over_gre_s_bos = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+ misc_2->outer_first_mpls_over_gre_ttl = 0;
+ } else {
+ mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+ misc_2->outer_first_mpls_over_udp_label = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+ misc_2->outer_first_mpls_over_udp_exp = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc_2->outer_first_mpls_over_udp_s_bos = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+ misc_2->outer_first_mpls_over_udp_ttl = 0;
+ }
+
+ MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr);
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_udp_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_udp_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_udp_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_udp_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_udp;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_gre_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_gre_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_gre_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_gre_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_gre;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag;
+}
+
+#define ICMP_TYPE_OFFSET_FIRST_DW 24
+#define ICMP_CODE_OFFSET_FIRST_DW 16
+
+static int
+dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
+ u32 *icmp_header_data;
+ int dw0_location;
+ int dw1_location;
+ u8 *parser_ptr;
+ u8 *icmp_type;
+ u8 *icmp_code;
+ bool is_ipv4;
+ u32 icmp_hdr;
+
+ is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3);
+ if (is_ipv4) {
+ icmp_header_data = &misc_3->icmpv4_header_data;
+ icmp_type = &misc_3->icmpv4_type;
+ icmp_code = &misc_3->icmpv4_code;
+ dw0_location = sb->caps->flex_parser_id_icmp_dw0;
+ dw1_location = sb->caps->flex_parser_id_icmp_dw1;
+ } else {
+ icmp_header_data = &misc_3->icmpv6_header_data;
+ icmp_type = &misc_3->icmpv6_type;
+ icmp_code = &misc_3->icmpv6_code;
+ dw0_location = sb->caps->flex_parser_id_icmpv6_dw0;
+ dw1_location = sb->caps->flex_parser_id_icmpv6_dw1;
+ }
+
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location);
+ icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) |
+ (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW);
+ *(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr);
+ *icmp_code = 0;
+ *icmp_type = 0;
+
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location);
+ *(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data);
+ *icmp_header_data = 0;
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ u8 parser_id;
+ bool is_ipv4;
+
+ dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3);
+ parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 :
+ sb->caps->flex_parser_id_icmpv6_dw0;
+ sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag;
+}
+
+static int
+dr_ste_v0_build_general_purpose_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc_2 = &value->misc2;
+
+ DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
+ misc_2, metadata_reg_a);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_general_purpose_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_GENERAL_PURPOSE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_general_purpose_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ if (sb->inner) {
+ DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num);
+ } else {
+ DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num);
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l4_misc_tag;
+}
+
+static int
+dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_flags, misc3,
+ outer_vxlan_gpe_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_next_protocol, misc3,
+ outer_vxlan_gpe_next_protocol);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_vni, misc3,
+ outer_vxlan_gpe_vni);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask);
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag;
+}
+
+static int
+dr_ste_v0_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_protocol_type, misc, geneve_protocol_type);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_oam, misc, geneve_oam);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_opt_len, misc, geneve_opt_len);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_vni, misc, geneve_vni);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask);
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tag;
+}
+
+static int
+dr_ste_v0_build_register_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
+ DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
+ DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
+ DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_register_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_register_0_tag;
+}
+
+static int
+dr_ste_v0_build_register_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
+ DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
+ DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
+ DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_register_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_register_1_tag;
+}
+
+static void
+dr_ste_v0_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
+ DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
+ misc_mask->source_eswitch_owner_vhca_id = 0;
+}
+
+static int
+dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *dmn = sb->dmn;
+ struct mlx5dr_domain *vport_dmn;
+ u8 *bit_mask = sb->bit_mask;
+ bool source_gvmi_set;
+
+ DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
+
+ if (sb->vhca_id_valid) {
+ /* Find port GVMI based on the eswitch_owner_vhca_id */
+ if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+ vport_dmn = dmn;
+ else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+ dmn->peer_dmn->info.caps.gvmi))
+ vport_dmn = dmn->peer_dmn;
+ else
+ return -EINVAL;
+
+ misc->source_eswitch_owner_vhca_id = 0;
+ } else {
+ vport_dmn = dmn;
+ }
+
+ source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi);
+ if (source_gvmi_set) {
+ vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn,
+ misc->source_port);
+ if (!vport_cap) {
+ mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n",
+ misc->source_port);
+ return -EINVAL;
+ }
+
+ if (vport_cap->vport_gvmi)
+ MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
+
+ misc->source_port = 0;
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag;
+}
+
+static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id,
+ u32 *misc4_field_value,
+ bool *parser_is_used,
+ u8 *tag)
+{
+ u32 id = *misc4_field_id;
+ u8 *parser_ptr;
+
+ if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id])
+ return;
+
+ parser_is_used[id] = true;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, id);
+
+ *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value);
+ *misc4_field_id = 0;
+ *misc4_field_value = 0;
+}
+
+static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4;
+ bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {};
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0,
+ &misc_4_mask->prog_sample_field_value_0,
+ parser_is_used, tag);
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1,
+ &misc_4_mask->prog_sample_field_value_1,
+ parser_is_used, tag);
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2,
+ &misc_4_mask->prog_sample_field_value_2,
+ parser_is_used, tag);
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3,
+ &misc_4_mask->prog_sample_field_value_3,
+ parser_is_used, tag);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
+}
+
+static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+ dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
+}
+
+static int
+dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+
+ MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3,
+ misc3->geneve_tlv_option_0_data);
+ misc3->geneve_tlv_option_0_data = 0;
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag;
+}
+
+static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+ gtpu_msg_flags, misc3,
+ gtpu_msg_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+ gtpu_msg_type, misc3,
+ gtpu_msg_type);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+ gtpu_teid, misc3,
+ gtpu_teid);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag;
+}
+
+static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ struct mlx5dr_match_misc5 *misc5 = &value->misc5;
+
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0);
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER;
+ dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag;
+}
+
+static struct mlx5dr_ste_ctx ste_ctx_v0 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v0_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v0_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v0_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v0_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v0_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v0_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v0_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v0_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v0_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v0_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v0_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v0_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v0_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v0_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v0_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_register_0_init = &dr_ste_v0_build_register_0_init,
+ .build_register_1_init = &dr_ste_v0_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v0_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v0_init,
+ .set_next_lu_type = &dr_ste_v0_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v0_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v0_set_miss_addr,
+ .get_miss_addr = &dr_ste_v0_get_miss_addr,
+ .set_hit_addr = &dr_ste_v0_set_hit_addr,
+ .set_byte_mask = &dr_ste_v0_set_byte_mask,
+ .get_byte_mask = &dr_ste_v0_get_byte_mask,
+
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_NONE,
+ .set_actions_rx = &dr_ste_v0_set_actions_rx,
+ .set_actions_tx = &dr_ste_v0_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v0_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v0_action_modify_field_arr,
+ .set_action_set = &dr_ste_v0_set_action_set,
+ .set_action_add = &dr_ste_v0_set_action_add,
+ .set_action_copy = &dr_ste_v0_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void)
+{
+ return &ste_ctx_v0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
new file mode 100644
index 000000000000..ee677a5c76be
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -0,0 +1,2172 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#include <linux/types.h>
+#include "mlx5_ifc_dr_ste_v1.h"
+#include "dr_ste_v1.h"
+
+#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \
+ ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \
+ DR_STE_V1_LU_TYPE_##lookup_type##_O)
+
+enum dr_ste_v1_entry_format {
+ DR_STE_V1_TYPE_BWC_BYTE = 0x0,
+ DR_STE_V1_TYPE_BWC_DW = 0x1,
+ DR_STE_V1_TYPE_MATCH = 0x2,
+};
+
+/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */
+enum {
+ DR_STE_V1_LU_TYPE_NOP = 0x0000,
+ DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002,
+ DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102,
+ DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003,
+ DR_STE_V1_LU_TYPE_IBL4 = 0x0103,
+ DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004,
+ DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005,
+ DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006,
+ DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007,
+ DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008,
+ DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108,
+ DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009,
+ DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109,
+ DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a,
+ DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b,
+ DR_STE_V1_LU_TYPE_MPLS_O = 0x010b,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c,
+ DR_STE_V1_LU_TYPE_MPLS_I = 0x010c,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d,
+ DR_STE_V1_LU_TYPE_GRE = 0x010d,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e,
+ DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f,
+ DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f,
+ DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112,
+ DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113,
+ DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114,
+ DR_STE_V1_LU_TYPE_INVALID = 0x00ff,
+ DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE,
+};
+
+enum dr_ste_v1_header_anchors {
+ DR_STE_HEADER_ANCHOR_START_OUTER = 0x00,
+ DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02,
+ DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07,
+ DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13,
+ DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19,
+};
+
+enum dr_ste_v1_action_size {
+ DR_STE_ACTION_SINGLE_SZ = 4,
+ DR_STE_ACTION_DOUBLE_SZ = 8,
+ DR_STE_ACTION_TRIPLE_SZ = 12,
+};
+
+enum dr_ste_v1_action_insert_ptr_attr {
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */
+};
+
+enum dr_ste_v1_action_id {
+ DR_STE_V1_ACTION_ID_NOP = 0x00,
+ DR_STE_V1_ACTION_ID_COPY = 0x05,
+ DR_STE_V1_ACTION_ID_SET = 0x06,
+ DR_STE_V1_ACTION_ID_ADD = 0x07,
+ DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09,
+ DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a,
+ DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b,
+ DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c,
+ DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d,
+ DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e,
+ DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f,
+ DR_STE_V1_ACTION_ID_ASO = 0x12,
+ DR_STE_V1_ACTION_ID_TRAILER = 0x13,
+ DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14,
+ DR_STE_V1_ACTION_ID_MAX = 0x21,
+ /* use for special cases */
+ DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22,
+};
+
+enum {
+ DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00,
+ DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01,
+ DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02,
+ DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08,
+ DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09,
+ DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e,
+ DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18,
+ DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f,
+ DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e,
+ DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f,
+ DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f,
+ DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70,
+ DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b,
+ DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91,
+};
+
+enum dr_ste_v1_aso_ctx_type {
+ DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
+ },
+};
+
+static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type);
+}
+
+void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+{
+ u64 index = miss_addr >> 6;
+
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32, index >> 26);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index);
+}
+
+u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
+{
+ u64 index =
+ ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
+ ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26);
+
+ return index << 6;
+}
+
+void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask);
+}
+
+u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask);
+}
+
+static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, lu_type >> 8);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF);
+}
+
+void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF);
+}
+
+u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p)
+{
+ u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format);
+ u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx);
+
+ return (mode << 8 | index);
+}
+
+static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi);
+}
+
+void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
+{
+ u64 index = (icm_addr >> 5) | ht_size;
+
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_39_32_size, index >> 27);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index);
+}
+
+void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi)
+{
+ dr_ste_v1_set_lu_type(hw_ste_p, lu_type);
+ dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
+
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, gvmi, gvmi);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi);
+}
+
+void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size)
+{
+ u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL;
+ u8 *mask = tag + DR_STE_SIZE_TAG;
+ u8 tmp_tag[DR_STE_SIZE_TAG] = {};
+
+ if (ste_size == DR_STE_SIZE_CTRL)
+ return;
+
+ WARN_ON(ste_size != DR_STE_SIZE);
+
+ /* Backup tag */
+ memcpy(tmp_tag, tag, DR_STE_SIZE_TAG);
+
+ /* Swap mask and tag both are the same size */
+ memcpy(tag, mask, DR_STE_SIZE_MASK);
+ memcpy(mask, tmp_tag, DR_STE_SIZE_TAG);
+}
+
+static void dr_ste_v1_set_rx_flow_tag(u8 *s_action, u32 flow_tag)
+{
+ MLX5_SET(ste_single_action_flow_tag_v1, s_action, action_id,
+ DR_STE_V1_ACTION_ID_FLOW_TAG);
+ MLX5_SET(ste_single_action_flow_tag_v1, s_action, flow_tag, flow_tag);
+}
+
+static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id);
+}
+
+static void dr_ste_v1_set_reparse(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1);
+}
+
+static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action,
+ u32 reformat_id, int size)
+{
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes,
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP);
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
+ u32 reformat_id,
+ u8 anchor, u8 offset,
+ int size)
+{
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action,
+ action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2);
+
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes,
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
+ u8 anchor, u8 offset,
+ int size)
+{
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, remove_size, size / 2);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_offset, offset / 2);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action,
+ u32 vlan_hdr)
+{
+ MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
+ action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE);
+ /* The hardware expects offset to vlan header in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
+ start_offset, HDR_LEN_L2_MACS >> 1);
+ MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
+ inline_data, vlan_hdr);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
+{
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p,
+ u8 *frst_s_action,
+ u8 *scnd_d_action,
+ u32 reformat_id,
+ int size)
+{
+ /* Remove L2 headers */
+ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER);
+ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, end_anchor,
+ DR_STE_HEADER_ANCHOR_IPV6_IPV4);
+
+ /* Encapsulate with given reformat ID */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, attributes,
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
+{
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER);
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, decap, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, vni_to_cqe, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, end_anchor,
+ DR_STE_HEADER_ANCHOR_INNER_MAC);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
+ u8 *s_action,
+ u16 num_of_actions,
+ u32 re_write_index)
+{
+ MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
+ DR_STE_V1_ACTION_ID_MODIFY_LIST);
+ MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
+ num_of_actions);
+ MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
+ re_write_index);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_aso_flow_meter(u8 *d_action,
+ u32 object_id,
+ u32 offset,
+ u8 dest_reg_id,
+ u8 init_color)
+{
+ MLX5_SET(ste_double_action_aso_v1, d_action, action_id,
+ DR_STE_V1_ACTION_ID_ASO);
+ MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_number,
+ object_id + (offset / MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ));
+ /* Convert reg_c index to HW 64bit index */
+ MLX5_SET(ste_double_action_aso_v1, d_action, dest_reg_id,
+ (dest_reg_id - 1) / 2);
+ MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_type,
+ DR_STE_V1_ASO_CTX_TYPE_POLICERS);
+ MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.line_id,
+ offset % MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ);
+ MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.initial_color,
+ init_color);
+}
+
+static void dr_ste_v1_arr_init_next_match(u8 **last_ste,
+ u32 *added_stes,
+ u16 gvmi)
+{
+ u8 *action;
+
+ (*added_stes)++;
+ *last_ste += DR_STE_SIZE;
+ dr_ste_v1_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, 0, gvmi);
+ dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH);
+
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, *last_ste, action);
+ memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action));
+}
+
+void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
+ u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
+ bool allow_modify_hdr = true;
+ bool allow_encap = true;
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+
+ /* Check if vlan_pop and modify_hdr on same STE is supported */
+ if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY))
+ allow_modify_hdr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_rewrite_actions(last_ste, action,
+ attr->modify_actions,
+ attr->modify_index);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_encap = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ || !allow_encap) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_encap = true;
+ }
+ dr_ste_v1_set_push_vlan(last_ste, action,
+ attr->vlans.headers[i]);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
+ if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_encap = true;
+ }
+ dr_ste_v1_set_encap(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
+ u8 *d_action;
+
+ if (action_sz < DR_STE_ACTION_TRIPLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ d_action = action + DR_STE_ACTION_SINGLE_SZ;
+
+ dr_ste_v1_set_encap_l3(last_ste,
+ action, d_action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_TRIPLE_SZ;
+ action += DR_STE_ACTION_TRIPLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
+ if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_insert_hdr(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_aso_flow_meter(action,
+ attr->aso_flow_meter.obj_id,
+ attr->aso_flow_meter.offset,
+ attr->aso_flow_meter.dest_reg_id,
+ attr->aso_flow_meter.init_color);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
+ dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
+ u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
+ bool allow_modify_hdr = true;
+ bool allow_ctr = true;
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
+ dr_ste_v1_set_rewrite_actions(last_ste, action,
+ attr->decap_actions,
+ attr->decap_index);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_modify_hdr = false;
+ allow_ctr = false;
+ } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) {
+ dr_ste_v1_set_rx_decap(last_ste, action);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ allow_modify_hdr = false;
+ allow_ctr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TAG]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_rx_flow_tag(action, attr->flow_tag);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ ||
+ !allow_modify_hdr) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+
+ dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ allow_ctr = false;
+
+ /* Check if vlan_pop and modify_hdr on same STE is supported */
+ if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY))
+ allow_modify_hdr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ /* Modify header and decapsulation must use different STEs */
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_rewrite_actions(last_ste, action,
+ attr->modify_actions,
+ attr->modify_index);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ ||
+ !allow_modify_hdr) {
+ dr_ste_v1_arr_init_next_match(&last_ste,
+ added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_push_vlan(last_ste, action,
+ attr->vlans.headers[i]);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR]) {
+ /* Counter action set after decap and before insert_hdr
+ * to exclude decaped / encaped header respectively.
+ */
+ if (!allow_ctr) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ }
+ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+ allow_ctr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_encap(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
+ u8 *d_action;
+
+ if (action_sz < DR_STE_ACTION_TRIPLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+
+ d_action = action + DR_STE_ACTION_SINGLE_SZ;
+
+ dr_ste_v1_set_encap_l3(last_ste,
+ action, d_action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
+ /* Modify header, decap, and encap must use different STEs */
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_insert_hdr(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_aso_flow_meter(action,
+ attr->aso_flow_meter.obj_id,
+ attr->aso_flow_meter.offset,
+ attr->aso_flow_meter.dest_reg_id,
+ attr->aso_flow_meter.init_color);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
+ dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+void dr_ste_v1_set_action_set(u8 *d_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET);
+ MLX5_SET(ste_double_action_set_v1, d_action, destination_dw_offset, hw_field);
+ MLX5_SET(ste_double_action_set_v1, d_action, destination_left_shifter, shifter);
+ MLX5_SET(ste_double_action_set_v1, d_action, destination_length, length);
+ MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data);
+}
+
+void dr_ste_v1_set_action_add(u8 *d_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD);
+ MLX5_SET(ste_double_action_add_v1, d_action, destination_dw_offset, hw_field);
+ MLX5_SET(ste_double_action_add_v1, d_action, destination_left_shifter, shifter);
+ MLX5_SET(ste_double_action_add_v1, d_action, destination_length, length);
+ MLX5_SET(ste_double_action_add_v1, d_action, add_value, data);
+}
+
+void dr_ste_v1_set_action_copy(u8 *d_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
+{
+ dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ MLX5_SET(ste_double_action_copy_v1, d_action, action_id, DR_STE_V1_ACTION_ID_COPY);
+ MLX5_SET(ste_double_action_copy_v1, d_action, destination_dw_offset, dst_hw_field);
+ MLX5_SET(ste_double_action_copy_v1, d_action, destination_left_shifter, dst_shifter);
+ MLX5_SET(ste_double_action_copy_v1, d_action, destination_length, dst_len);
+ MLX5_SET(ste_double_action_copy_v1, d_action, source_dw_offset, src_hw_field);
+ MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter);
+}
+
+#define DR_STE_DECAP_L3_ACTION_NUM 8
+#define DR_STE_L2_HDR_MAX_SZ 20
+
+int dr_ste_v1_set_action_decap_l3_list(void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num)
+{
+ u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {};
+ void *data_ptr = padded_data;
+ u16 used_actions = 0;
+ u32 inline_data_sz;
+ u32 i;
+
+ if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
+ return -EINVAL;
+
+ inline_data_sz =
+ MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+
+ /* Add an alignment padding */
+ memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
+
+ /* Remove L2L3 outer headers */
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER);
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, decap, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, vni_to_cqe, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, end_anchor,
+ DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4);
+ hw_action += DR_STE_ACTION_DOUBLE_SZ;
+ used_actions++; /* Remove and NOP are a single double action */
+
+ /* Point to the last dword of the header */
+ data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
+
+ /* Add the new header using inline action 4Byte at a time, the header
+ * is added in reversed order to the beginning of the packet to avoid
+ * incorrect parsing by the HW. Since header is 14B or 18B an extra
+ * two bytes are padded and later removed.
+ */
+ for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
+ void *addr_inline;
+
+ MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_INLINE);
+ /* The hardware expects here offset to words (2 bytes) */
+ MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
+
+ /* Copy bytes one by one to avoid endianness problem */
+ addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
+ hw_action, inline_data);
+ memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
+ hw_action += DR_STE_ACTION_DOUBLE_SZ;
+ used_actions++;
+ }
+
+ /* Remove first 2 extra bytes */
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
+ /* The hardware expects here size in words (2 bytes) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
+ used_actions++;
+
+ *used_hw_action_num = used_actions;
+
+ return 0;
+}
+
+static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_15_0, mask, smac_15_0);
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_ONES(eth_l2_src_dst_v1, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ } else if (mask->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->svlan_tag = 0;
+ }
+}
+
+static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_15_0, spec, dmac_15_0);
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_15_0, spec, smac_15_0);
+
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else if (spec->ip_version) {
+ return -EINVAL;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_priority, spec, first_prio);
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC_DST, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_dst_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_DES, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_dst_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_SRC, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_src_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_address, spec, dst_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_address, spec, src_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, ecn, spec, ip_ecn);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple_v1, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_5_TUPLE, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag;
+}
+
+static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_mask->inner_second_cvlan_tag ||
+ misc_mask->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->inner_second_cvlan_tag = 0;
+ misc_mask->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_vlan_id, misc_mask, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_cfi, misc_mask, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_priority, misc_mask, inner_second_prio);
+ } else {
+ if (misc_mask->outer_second_cvlan_tag ||
+ misc_mask->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->outer_second_cvlan_tag = 0;
+ misc_mask->outer_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_vlan_id, misc_mask, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_cfi, misc_mask, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_priority, misc_mask, outer_second_prio);
+ }
+}
+
+static int dr_ste_v1_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
+ bool inner, u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_spec = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, l3_ethertype, spec, ethertype);
+
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else if (spec->ip_version) {
+ return -EINVAL;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_spec->inner_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->inner_second_cvlan_tag = 0;
+ } else if (misc_spec->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, inner_second_prio);
+ } else {
+ if (misc_spec->outer_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->outer_second_cvlan_tag = 0;
+ } else if (misc_spec->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->outer_second_svlan_tag = 0;
+ }
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, outer_second_prio);
+ }
+
+ return 0;
+}
+
+static void dr_ste_v1_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_15_0, mask, smac_15_0);
+
+ dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_15_0, spec, smac_15_0);
+
+ return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_tag;
+}
+
+static void dr_ste_v1_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_15_0, spec, dmac_15_0);
+
+ return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_dst_tag;
+}
+
+static void dr_ste_v1_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_15_0, mask, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_tnl_v1, bit_mask, l3_type, mask, ip_version);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl_v1, bit_mask,
+ l2_tunneling_network_id, (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+}
+
+static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_15_0, spec, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, l3_ethertype, spec, ethertype);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, l2_tunneling_network_id,
+ (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else if (spec->ip_version) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_ETHL2_TNL;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_tnl_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit);
+ DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl);
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_MISC, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_misc_tag;
+}
+
+static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l4_v1, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l4_v1, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l4_v1, tag, ecn, spec, ip_ecn);
+ DR_STE_SET_TAG(eth_l4_v1, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+
+ if (sb->inner)
+ DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, inner_ipv6_flow_label);
+ else
+ DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, outer_ipv6_flow_label);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l4_v1, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL4, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_ipv6_l3_l4_tag;
+}
+
+static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ if (sb->inner)
+ DR_STE_SET_MPLS(mpls_v1, misc2, inner, tag);
+ else
+ DR_STE_SET_MPLS(mpls_v1, misc2, outer, tag);
+
+ return 0;
+}
+
+void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(MPLS, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_mpls_tag;
+}
+
+static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(gre_v1, tag, gre_protocol, misc, gre_protocol);
+ DR_STE_SET_TAG(gre_v1, tag, gre_k_present, misc, gre_k_present);
+ DR_STE_SET_TAG(gre_v1, tag, gre_key_h, misc, gre_key_h);
+ DR_STE_SET_TAG(gre_v1, tag, gre_key_l, misc, gre_key_l);
+
+ DR_STE_SET_TAG(gre_v1, tag, gre_c_present, misc, gre_c_present);
+ DR_STE_SET_TAG(gre_v1, tag, gre_s_present, misc, gre_s_present);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_GRE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gre_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc2)) {
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_label,
+ misc2, outer_first_mpls_over_gre_label);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp,
+ misc2, outer_first_mpls_over_gre_exp);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos,
+ misc2, outer_first_mpls_over_gre_s_bos);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl,
+ misc2, outer_first_mpls_over_gre_ttl);
+ } else {
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_label,
+ misc2, outer_first_mpls_over_udp_label);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp,
+ misc2, outer_first_mpls_over_udp_exp);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos,
+ misc2, outer_first_mpls_over_udp_s_bos);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl,
+ misc2, outer_first_mpls_over_udp_ttl);
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_MPLS_I;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_udp_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_udp_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_udp_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_udp_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_udp;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_gre_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_gre_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_gre_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_gre_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_gre;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag;
+}
+
+static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ bool is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc3);
+ u32 *icmp_header_data;
+ u8 *icmp_type;
+ u8 *icmp_code;
+
+ if (is_ipv4) {
+ icmp_header_data = &misc3->icmpv4_header_data;
+ icmp_type = &misc3->icmpv4_type;
+ icmp_code = &misc3->icmpv4_code;
+ } else {
+ icmp_header_data = &misc3->icmpv6_header_data;
+ icmp_type = &misc3->icmpv6_type;
+ icmp_code = &misc3->icmpv6_code;
+ }
+
+ MLX5_SET(ste_icmp_v1, tag, icmp_header_data, *icmp_header_data);
+ MLX5_SET(ste_icmp_v1, tag, icmp_type, *icmp_type);
+ MLX5_SET(ste_icmp_v1, tag, icmp_code, *icmp_code);
+
+ *icmp_header_data = 0;
+ *icmp_type = 0;
+ *icmp_code = 0;
+
+ return 0;
+}
+
+void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag;
+}
+
+static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
+ misc2, metadata_reg_a);
+
+ return 0;
+}
+
+void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_GENERAL_PURPOSE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_general_purpose_tag;
+}
+
+static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ if (sb->inner) {
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, inner_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, inner_tcp_ack_num);
+ } else {
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, outer_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, outer_tcp_ack_num);
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l4_misc_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_flags, misc3,
+ outer_vxlan_gpe_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_next_protocol, misc3,
+ outer_vxlan_gpe_next_protocol);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_vni, misc3,
+ outer_vxlan_gpe_vni);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_protocol_type, misc, geneve_protocol_type);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_oam, misc, geneve_oam);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_opt_len, misc, geneve_opt_len);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_vni, misc, geneve_vni);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag;
+}
+
+static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ struct mlx5dr_match_misc5 *misc5 = &value->misc5;
+
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0);
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag;
+}
+
+static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
+ DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
+ DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
+ DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
+
+ return 0;
+}
+
+void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_register_0_tag;
+}
+
+static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
+ DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
+ DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
+ DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+
+ return 0;
+}
+
+void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_register_1_tag;
+}
+
+static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port);
+ DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn);
+ misc_mask->source_eswitch_owner_vhca_id = 0;
+}
+
+static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *dmn = sb->dmn;
+ struct mlx5dr_domain *vport_dmn;
+ u8 *bit_mask = sb->bit_mask;
+
+ DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn);
+
+ if (sb->vhca_id_valid) {
+ /* Find port GVMI based on the eswitch_owner_vhca_id */
+ if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+ vport_dmn = dmn;
+ else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+ dmn->peer_dmn->info.caps.gvmi))
+ vport_dmn = dmn->peer_dmn;
+ else
+ return -EINVAL;
+
+ misc->source_eswitch_owner_vhca_id = 0;
+ } else {
+ vport_dmn = dmn;
+ }
+
+ if (!MLX5_GET(ste_src_gvmi_qp_v1, bit_mask, source_gvmi))
+ return 0;
+
+ vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, misc->source_port);
+ if (!vport_cap) {
+ mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n",
+ misc->source_port);
+ return -EINVAL;
+ }
+
+ if (vport_cap->vport_gvmi)
+ MLX5_SET(ste_src_gvmi_qp_v1, tag, source_gvmi, vport_cap->vport_gvmi);
+
+ misc->source_port = 0;
+ return 0;
+}
+
+void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_SRC_QP_GVMI;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag;
+}
+
+static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id,
+ u32 *misc4_field_value,
+ bool *parser_is_used,
+ u8 *tag)
+{
+ u32 id = *misc4_field_id;
+ u8 *parser_ptr;
+
+ if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id])
+ return;
+
+ parser_is_used[id] = true;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, id);
+
+ *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value);
+ *misc4_field_id = 0;
+ *misc4_field_value = 0;
+}
+
+static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4;
+ bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {};
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0,
+ &misc_4_mask->prog_sample_field_value_0,
+ parser_is_used, tag);
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1,
+ &misc_4_mask->prog_sample_field_value_1,
+ parser_is_used, tag);
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2,
+ &misc_4_mask->prog_sample_field_value_2,
+ parser_is_used, tag);
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3,
+ &misc_4_mask->prog_sample_field_value_3,
+ parser_is_used, tag);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+ dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
+}
+
+void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
+ dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+
+ MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3,
+ misc3->geneve_tlv_option_0_data);
+ misc3->geneve_tlv_option_0_data = 0;
+
+ return 0;
+}
+
+void
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ?
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ if (misc->geneve_tlv_option_0_exist) {
+ MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id);
+ misc->geneve_tlv_option_0_exist = 0;
+ }
+
+ return 0;
+}
+
+void
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK;
+ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag;
+}
+
+static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag;
+}
+
+static int
+dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+void
+dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag;
+}
+
+static int
+dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+void
+dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag;
+}
+
+static struct mlx5dr_ste_ctx ste_ctx_v1 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v1_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v1_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init,
+ .build_register_0_init = &dr_ste_v1_build_register_0_init,
+ .build_register_1_init = &dr_ste_v1_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v1_init,
+ .set_next_lu_type = &dr_ste_v1_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v1_set_miss_addr,
+ .get_miss_addr = &dr_ste_v1_get_miss_addr,
+ .set_hit_addr = &dr_ste_v1_set_hit_addr,
+ .set_byte_mask = &dr_ste_v1_set_byte_mask,
+ .get_byte_mask = &dr_ste_v1_get_byte_mask,
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
+ DR_STE_CTX_ACTION_CAP_RX_PUSH |
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP |
+ DR_STE_CTX_ACTION_CAP_POP_MDFY,
+ .set_actions_rx = &dr_ste_v1_set_actions_rx,
+ .set_actions_tx = &dr_ste_v1_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v1_action_modify_field_arr,
+ .set_action_set = &dr_ste_v1_set_action_set,
+ .set_action_add = &dr_ste_v1_set_action_add,
+ .set_action_copy = &dr_ste_v1_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+ /* Send */
+ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void)
+{
+ return &ste_ctx_v1;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
new file mode 100644
index 000000000000..8a1d49790c6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef _DR_STE_V1_
+#define _DR_STE_V1_
+
+#include "dr_types.h"
+#include "dr_ste.h"
+
+void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr);
+u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p);
+void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask);
+u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p);
+void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type);
+u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p);
+void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size);
+void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi);
+void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size);
+void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set,
+ u32 actions_caps, u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
+void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set,
+ u32 actions_caps, u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
+void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter,
+ u8 length, u32 data);
+void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter,
+ u8 length, u32 data);
+void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter,
+ u8 dst_len, u8 src_hw_field, u8 src_shifter);
+int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action,
+ u32 hw_action_sz, u16 *used_hw_action_num);
+void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+
+#endif /* _DR_STE_V1_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
new file mode 100644
index 000000000000..c60fddd125d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "dr_ste_v1.h"
+
+enum {
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00,
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01,
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02,
+ DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08,
+ DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09,
+ DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e,
+ DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18,
+ DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f,
+ DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e,
+ DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f,
+ DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f,
+ DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70,
+ DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b,
+ DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
+ },
+};
+
+static struct mlx5dr_ste_ctx ste_ctx_v2 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v1_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v1_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_tnl_geneve_tlv_opt_exist_init =
+ &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init,
+ .build_register_0_init = &dr_ste_v1_build_register_0_init,
+ .build_register_1_init = &dr_ste_v1_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v1_init,
+ .set_next_lu_type = &dr_ste_v1_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v1_set_miss_addr,
+ .get_miss_addr = &dr_ste_v1_get_miss_addr,
+ .set_hit_addr = &dr_ste_v1_set_hit_addr,
+ .set_byte_mask = &dr_ste_v1_set_byte_mask,
+ .get_byte_mask = &dr_ste_v1_get_byte_mask,
+
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
+ DR_STE_CTX_ACTION_CAP_RX_PUSH |
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP,
+ .set_actions_rx = &dr_ste_v1_set_actions_rx,
+ .set_actions_tx = &dr_ste_v1_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v2_action_modify_field_arr,
+ .set_action_set = &dr_ste_v1_set_action_set,
+ .set_action_add = &dr_ste_v1_set_action_add,
+ .set_action_copy = &dr_ste_v1_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+
+ /* Send */
+ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void)
+{
+ return &ste_ctx_v2;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index 14ce2d7dbb66..31d443dd8386 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -3,69 +3,68 @@
#include "dr_types.h"
-int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
- struct mlx5dr_action *action)
+static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_action *action)
{
- struct mlx5dr_matcher *last_matcher = NULL;
+ struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL;
struct mlx5dr_htbl_connect_info info;
struct mlx5dr_ste_htbl *last_htbl;
+ struct mlx5dr_icm_chunk *chunk;
+ int ret;
+
+ if (!list_empty(&nic_tbl->nic_matcher_list))
+ last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list,
+ struct mlx5dr_matcher_rx_tx,
+ list_node);
+
+ if (last_nic_matcher)
+ last_htbl = last_nic_matcher->e_anchor;
+ else
+ last_htbl = nic_tbl->s_anchor;
+
+ if (action) {
+ chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ?
+ action->dest_tbl->tbl->rx.s_anchor->chunk :
+ action->dest_tbl->tbl->tx.s_anchor->chunk;
+ nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
+ } else {
+ nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr;
+ }
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn,
+ last_htbl, &info, true);
+ if (ret)
+ mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret);
+
+ return ret;
+}
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action)
+{
int ret;
if (action && action->action_type != DR_ACTION_TYP_FT)
return -EOPNOTSUPP;
- mutex_lock(&tbl->dmn->mutex);
-
- if (!list_empty(&tbl->matcher_list))
- last_matcher = list_last_entry(&tbl->matcher_list,
- struct mlx5dr_matcher,
- matcher_list);
+ mlx5dr_domain_lock(tbl->dmn);
if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX ||
tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
- if (last_matcher)
- last_htbl = last_matcher->rx.e_anchor;
- else
- last_htbl = tbl->rx.s_anchor;
-
- tbl->rx.default_icm_addr = action ?
- action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
- tbl->rx.nic_dmn->default_icm_addr;
-
- info.type = CONNECT_MISS;
- info.miss_icm_addr = tbl->rx.default_icm_addr;
-
- ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
- tbl->rx.nic_dmn,
- last_htbl,
- &info, true);
- if (ret) {
- mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret);
+ ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action);
+ if (ret)
goto out;
- }
}
if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX ||
tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
- if (last_matcher)
- last_htbl = last_matcher->tx.e_anchor;
- else
- last_htbl = tbl->tx.s_anchor;
-
- tbl->tx.default_icm_addr = action ?
- action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr :
- tbl->tx.nic_dmn->default_icm_addr;
-
- info.type = CONNECT_MISS;
- info.miss_icm_addr = tbl->tx.default_icm_addr;
-
- ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
- tbl->tx.nic_dmn,
- last_htbl, &info, true);
- if (ret) {
- mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret);
+ ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action);
+ if (ret)
goto out;
- }
}
/* Release old action */
@@ -78,7 +77,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
refcount_inc(&action->refcount);
out:
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
return ret;
}
@@ -95,7 +94,7 @@ static void dr_table_uninit_fdb(struct mlx5dr_table *tbl)
static void dr_table_uninit(struct mlx5dr_table *tbl)
{
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
switch (tbl->dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -112,7 +111,7 @@ static void dr_table_uninit(struct mlx5dr_table *tbl)
break;
}
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
}
static int dr_table_init_nic(struct mlx5dr_domain *dmn,
@@ -122,22 +121,28 @@ static int dr_table_init_nic(struct mlx5dr_domain *dmn,
struct mlx5dr_htbl_connect_info info;
int ret;
+ INIT_LIST_HEAD(&nic_tbl->nic_matcher_list);
+
nic_tbl->default_icm_addr = nic_dmn->default_icm_addr;
nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
DR_CHUNK_SIZE_1,
MLX5DR_STE_LU_TYPE_DONT_CARE,
0);
- if (!nic_tbl->s_anchor)
+ if (!nic_tbl->s_anchor) {
+ mlx5dr_err(dmn, "Failed allocating htbl\n");
return -ENOMEM;
+ }
info.type = CONNECT_MISS;
info.miss_icm_addr = nic_dmn->default_icm_addr;
ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
nic_tbl->s_anchor,
&info, true);
- if (ret)
+ if (ret) {
+ mlx5dr_err(dmn, "Failed int and send htbl\n");
goto free_s_anchor;
+ }
mlx5dr_htbl_get(nic_tbl->s_anchor);
@@ -173,7 +178,7 @@ static int dr_table_init(struct mlx5dr_table *tbl)
INIT_LIST_HEAD(&tbl->matcher_list);
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
switch (tbl->dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -197,7 +202,7 @@ static int dr_table_init(struct mlx5dr_table *tbl)
break;
}
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
return ret;
}
@@ -209,7 +214,7 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
tbl->table_type);
}
-static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
+static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid)
{
bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
@@ -219,10 +224,10 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
int ret;
if (tbl->rx.s_anchor)
- icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr;
+ icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk);
if (tbl->tx.s_anchor)
- icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr;
+ icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk);
ft_attr.table_type = tbl->table_type;
ft_attr.icm_addr_rx = icm_addr_rx;
@@ -231,6 +236,7 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
ft_attr.sw_owner = true;
ft_attr.decap_en = en_decap;
ft_attr.reformat_en = en_encap;
+ ft_attr.uid = uid;
ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr,
NULL, &tbl->table_id);
@@ -238,7 +244,8 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
return ret;
}
-struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags)
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level,
+ u32 flags, u16 uid)
{
struct mlx5dr_table *tbl;
int ret;
@@ -258,10 +265,12 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u
if (ret)
goto free_tbl;
- ret = dr_table_create_sw_owned_tbl(tbl);
+ ret = dr_table_create_sw_owned_tbl(tbl, uid);
if (ret)
goto uninit_tbl;
+ INIT_LIST_HEAD(&tbl->dbg_node);
+ mlx5dr_dbg_tbl_add(tbl);
return tbl;
uninit_tbl:
@@ -277,9 +286,10 @@ int mlx5dr_table_destroy(struct mlx5dr_table *tbl)
{
int ret;
- if (refcount_read(&tbl->refcount) > 1)
+ if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1))
return -EBUSY;
+ mlx5dr_dbg_tbl_del(tbl);
ret = dr_table_destroy_sw_owned_tbl(tbl);
if (ret)
return ret;
@@ -299,3 +309,8 @@ u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl)
{
return tbl->table_id;
}
+
+struct mlx5dr_table *mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+ return ft->fs_dr_table.dr_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index dffe35145d19..1777a1e508e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -4,24 +4,38 @@
#ifndef _DR_TYPES_
#define _DR_TYPES_
-#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include <linux/refcount.h>
#include "fs_core.h"
#include "wq.h"
#include "lib/mlx5.h"
#include "mlx5_ifc_dr.h"
#include "mlx5dr.h"
+#include "dr_dbg.h"
-#define DR_RULE_MAX_STES 17
+#define DR_RULE_MAX_STES 18
#define DR_ACTION_MAX_STES 5
-#define WIRE_PORT 0xFFFF
#define DR_STE_SVLAN 0x1
#define DR_STE_CVLAN 0x2
+#define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4)
+#define DR_NUM_OF_FLEX_PARSERS 8
+#define DR_STE_MAX_FLEX_0_ID 3
+#define DR_STE_MAX_FLEX_1_ID 7
#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
+static inline bool dr_is_flex_parser_0_id(u8 parser_id)
+{
+ return parser_id <= DR_STE_MAX_FLEX_0_ID;
+}
+
+static inline bool dr_is_flex_parser_1_id(u8 parser_id)
+{
+ return parser_id > DR_STE_MAX_FLEX_0_ID;
+}
+
enum mlx5dr_icm_chunk_size {
DR_CHUNK_SIZE_1,
DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */
@@ -69,10 +83,15 @@ enum {
DR_STE_SIZE_CTRL = 32,
DR_STE_SIZE_TAG = 16,
DR_STE_SIZE_MASK = 16,
+ DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
};
-enum {
- DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
+enum mlx5dr_ste_ctx_action_cap {
+ DR_STE_CTX_ACTION_CAP_NONE = 0,
+ DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0,
+ DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1,
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2,
+ DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3,
};
enum {
@@ -86,7 +105,9 @@ enum mlx5dr_matcher_criteria {
DR_MATCHER_CRITERIA_INNER = 1 << 2,
DR_MATCHER_CRITERIA_MISC2 = 1 << 3,
DR_MATCHER_CRITERIA_MISC3 = 1 << 4,
- DR_MATCHER_CRITERIA_MAX = 1 << 5,
+ DR_MATCHER_CRITERIA_MISC4 = 1 << 5,
+ DR_MATCHER_CRITERIA_MISC5 = 1 << 6,
+ DR_MATCHER_CRITERIA_MAX = 1 << 7,
};
enum mlx5dr_action_type {
@@ -103,6 +124,10 @@ enum mlx5dr_action_type {
DR_ACTION_TYP_VPORT,
DR_ACTION_TYP_POP_VLAN,
DR_ACTION_TYP_PUSH_VLAN,
+ DR_ACTION_TYP_INSERT_HDR,
+ DR_ACTION_TYP_REMOVE_HDR,
+ DR_ACTION_TYP_SAMPLER,
+ DR_ACTION_TYP_ASO_FLOW_METER,
DR_ACTION_TYP_MAX,
};
@@ -114,30 +139,31 @@ enum mlx5dr_ipv {
struct mlx5dr_icm_pool;
struct mlx5dr_icm_chunk;
-struct mlx5dr_icm_bucket;
+struct mlx5dr_icm_buddy_mem;
struct mlx5dr_ste_htbl;
struct mlx5dr_match_param;
struct mlx5dr_cmd_caps;
+struct mlx5dr_rule_rx_tx;
struct mlx5dr_matcher_rx_tx;
+struct mlx5dr_ste_ctx;
struct mlx5dr_ste {
- u8 *hw_ste;
/* refcount: indicates the num of rules that using this ste */
u32 refcount;
+ /* this ste is part of a rule, located in ste's chain */
+ u8 ste_chain_location;
+
/* attached to the miss_list head at each htbl entry */
struct list_head miss_list_node;
- /* each rule member that uses this ste attached here */
- struct list_head rule_list;
-
/* this ste is member of htbl */
struct mlx5dr_ste_htbl *htbl;
struct mlx5dr_ste_htbl *next_htbl;
- /* this ste is part of a rule, located in ste's chain */
- u8 ste_chain_location;
+ /* The rule this STE belongs to */
+ struct mlx5dr_rule_rx_tx *rule_rx_tx;
};
struct mlx5dr_ste_htbl_ctrl {
@@ -148,23 +174,14 @@ struct mlx5dr_ste_htbl_ctrl {
/* total number of collisions entries attached to this table */
unsigned int num_of_collisions;
- unsigned int increase_threshold;
- u8 may_grow:1;
};
struct mlx5dr_ste_htbl {
- u8 lu_type;
+ u16 lu_type;
u16 byte_mask;
u32 refcount;
struct mlx5dr_icm_chunk *chunk;
- struct mlx5dr_ste *ste_arr;
- u8 *hw_ste_arr;
-
- struct list_head *miss_list;
-
- enum mlx5dr_icm_chunk_size chunk_size;
struct mlx5dr_ste *pointing_ste;
-
struct mlx5dr_ste_htbl_ctrl ctrl;
};
@@ -189,18 +206,18 @@ struct mlx5dr_ste_build {
u8 vhca_id_valid:1;
struct mlx5dr_domain *dmn;
struct mlx5dr_cmd_caps *caps;
- u8 lu_type;
+ u16 lu_type;
u16 byte_mask;
u8 bit_mask[DR_STE_SIZE_MASK];
int (*ste_build_tag_func)(struct mlx5dr_match_param *spec,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p);
+ u8 *tag);
};
struct mlx5dr_ste_htbl *
mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
enum mlx5dr_icm_chunk_size chunk_size,
- u8 lu_type, u16 byte_mask);
+ u16 lu_type, u16 byte_mask);
int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl);
@@ -218,36 +235,95 @@ static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl)
/* STE utils */
u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl);
-void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, u16 gvmi);
-void mlx5dr_ste_always_hit_htbl(struct mlx5dr_ste *ste,
- struct mlx5dr_ste_htbl *next_htbl);
-void mlx5dr_ste_set_miss_addr(u8 *hw_ste, u64 miss_addr);
-u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste);
-void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi);
-void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size);
-void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr);
+void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 miss_addr);
+void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 icm_addr, u32 ht_size);
+void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste,
+ struct mlx5dr_ste_htbl *next_htbl);
void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask);
-bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste);
bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
u8 ste_location);
-void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag);
-void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id);
-void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id,
- int size, bool encap_l3);
-void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p);
-void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan);
-void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p);
-void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_tpid_pcp_dei_vid,
- bool go_back);
-void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type);
-u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p);
-void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
- u32 re_write_index);
-void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p);
u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste);
u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste);
struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste);
+#define MLX5DR_MAX_VLANS 2
+
+struct mlx5dr_ste_actions_attr {
+ u32 modify_index;
+ u16 modify_actions;
+ u32 decap_index;
+ u16 decap_actions;
+ u8 decap_with_vlan:1;
+ u64 final_icm_addr;
+ u32 flow_tag;
+ u32 ctr_id;
+ u16 gvmi;
+ u16 hit_gvmi;
+ struct {
+ u32 id;
+ u32 size;
+ u8 param_0;
+ u8 param_1;
+ } reformat;
+ struct {
+ int count;
+ u32 headers[MLX5DR_MAX_VLANS];
+ } vlans;
+
+ struct {
+ u32 obj_id;
+ u32 offset;
+ u8 dest_reg_id;
+ u8 init_color;
+ } aso_flow_meter;
+};
+
+void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+
+void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter);
+int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
+ void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num);
+
+const struct mlx5dr_ste_action_modify_field *
+mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field);
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version);
void mlx5dr_ste_free(struct mlx5dr_ste *ste,
struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher);
@@ -266,8 +342,11 @@ static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
ste->refcount++;
}
-void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
- struct mlx5dr_ste_htbl *next_htbl);
+static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste)
+{
+ return !ste->refcount;
+}
+
bool mlx5dr_ste_equal_tag(void *src, void *dst);
int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
@@ -284,68 +363,134 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
struct mlx5dr_match_param *value,
u8 *ste_arr);
-int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx);
-void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *builder,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx);
-void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx);
-void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- struct mlx5dr_cmd_caps *caps,
- bool inner, bool rx);
-void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx);
-void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
bool inner, bool rx);
-void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- struct mlx5dr_domain *dmn,
- bool inner, bool rx);
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn,
+ bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
/* Actions utils */
@@ -361,57 +506,66 @@ struct mlx5dr_match_spec {
/* Incoming packet Ethertype - this is the Ethertype
* following the last VLAN tag of the packet
*/
- u32 ethertype:16;
u32 smac_15_0:16; /* Source MAC address of incoming packet */
+ u32 ethertype:16;
+
u32 dmac_47_16; /* Destination MAC address of incoming packet */
- /* VLAN ID of first VLAN tag in the incoming packet.
+
+ u32 dmac_15_0:16; /* Destination MAC address of incoming packet */
+ /* Priority of first VLAN tag in the incoming packet.
* Valid only when cvlan_tag==1 or svlan_tag==1
*/
- u32 first_vid:12;
+ u32 first_prio:3;
/* CFI bit of first VLAN tag in the incoming packet.
* Valid only when cvlan_tag==1 or svlan_tag==1
*/
u32 first_cfi:1;
- /* Priority of first VLAN tag in the incoming packet.
+ /* VLAN ID of first VLAN tag in the incoming packet.
* Valid only when cvlan_tag==1 or svlan_tag==1
*/
- u32 first_prio:3;
- u32 dmac_15_0:16; /* Destination MAC address of incoming packet */
- /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
- * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
+ u32 first_vid:12;
+
+ u32 ip_protocol:8; /* IP protocol */
+ /* Differentiated Services Code Point derived from
+ * Traffic Class/TOS field of IPv6/v4
*/
- u32 tcp_flags:9;
- u32 ip_version:4; /* IP version */
- u32 frag:1; /* Packet is an IP fragment */
- /* The first vlan in the packet is s-vlan (0x8a88).
- * cvlan_tag and svlan_tag cannot be set together
+ u32 ip_dscp:6;
+ /* Explicit Congestion Notification derived from
+ * Traffic Class/TOS field of IPv6/v4
*/
- u32 svlan_tag:1;
+ u32 ip_ecn:2;
/* The first vlan in the packet is c-vlan (0x8100).
* cvlan_tag and svlan_tag cannot be set together
*/
u32 cvlan_tag:1;
- /* Explicit Congestion Notification derived from
- * Traffic Class/TOS field of IPv6/v4
+ /* The first vlan in the packet is s-vlan (0x8a88).
+ * cvlan_tag and svlan_tag cannot be set together
*/
- u32 ip_ecn:2;
- /* Differentiated Services Code Point derived from
- * Traffic Class/TOS field of IPv6/v4
+ u32 svlan_tag:1;
+ u32 frag:1; /* Packet is an IP fragment */
+ u32 ip_version:4; /* IP version */
+ /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
+ * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
*/
- u32 ip_dscp:6;
- u32 ip_protocol:8; /* IP protocol */
+ u32 tcp_flags:9;
+
+ /* TCP source port.;tcp and udp sport/dport are mutually exclusive */
+ u32 tcp_sport:16;
/* TCP destination port.
* tcp and udp sport/dport are mutually exclusive
*/
u32 tcp_dport:16;
- /* TCP source port.;tcp and udp sport/dport are mutually exclusive */
- u32 tcp_sport:16;
+
+ u32 reserved_auto1:16;
+ u32 ipv4_ihl:4;
+ u32 reserved_auto2:4;
u32 ttl_hoplimit:8;
- u32 reserved:24;
- /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
- u32 udp_dport:16;
+
/* UDP source port.;tcp and udp sport/dport are mutually exclusive */
u32 udp_sport:16;
+ /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
+ u32 udp_dport:16;
+
/* IPv6 source address of incoming packets
* For IPv4 address use bits 31:0 (rest of the bits are reserved)
* This field should be qualified by an appropriate ethertype
@@ -455,96 +609,114 @@ struct mlx5dr_match_spec {
};
struct mlx5dr_match_misc {
- u32 source_sqn:24; /* Source SQN */
- u32 source_vhca_port:4;
- /* used with GRE, sequence number exist when gre_s_present == 1 */
- u32 gre_s_present:1;
- /* used with GRE, key exist when gre_k_present == 1 */
- u32 gre_k_present:1;
- u32 reserved_auto1:1;
/* used with GRE, checksum exist when gre_c_present == 1 */
u32 gre_c_present:1;
+ u32 reserved_auto1:1;
+ /* used with GRE, key exist when gre_k_present == 1 */
+ u32 gre_k_present:1;
+ /* used with GRE, sequence number exist when gre_s_present == 1 */
+ u32 gre_s_present:1;
+ u32 source_vhca_port:4;
+ u32 source_sqn:24; /* Source SQN */
+
+ u32 source_eswitch_owner_vhca_id:16;
/* Source port.;0xffff determines wire port */
u32 source_port:16;
- u32 source_eswitch_owner_vhca_id:16;
- /* VLAN ID of first VLAN tag the inner header of the incoming packet.
- * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
- */
- u32 inner_second_vid:12;
- /* CFI bit of first VLAN tag in the inner header of the incoming packet.
- * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
- */
- u32 inner_second_cfi:1;
- /* Priority of second VLAN tag in the inner header of the incoming packet.
- * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
- */
- u32 inner_second_prio:3;
- /* VLAN ID of first VLAN tag the outer header of the incoming packet.
+
+ /* Priority of second VLAN tag in the outer header of the incoming packet.
* Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
*/
- u32 outer_second_vid:12;
+ u32 outer_second_prio:3;
/* CFI bit of first VLAN tag in the outer header of the incoming packet.
* Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
*/
u32 outer_second_cfi:1;
- /* Priority of second VLAN tag in the outer header of the incoming packet.
+ /* VLAN ID of first VLAN tag the outer header of the incoming packet.
* Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
*/
- u32 outer_second_prio:3;
- u32 gre_protocol:16; /* GRE Protocol (outer) */
- u32 reserved_auto3:12;
- /* The second vlan in the inner header of the packet is s-vlan (0x8a88).
- * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ u32 outer_second_vid:12;
+ /* Priority of second VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
*/
- u32 inner_second_svlan_tag:1;
- /* The second vlan in the outer header of the packet is s-vlan (0x8a88).
+ u32 inner_second_prio:3;
+ /* CFI bit of first VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_cfi:1;
+ /* VLAN ID of first VLAN tag the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_vid:12;
+
+ u32 outer_second_cvlan_tag:1;
+ u32 inner_second_cvlan_tag:1;
+ /* The second vlan in the outer header of the packet is c-vlan (0x8100).
* outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
*/
u32 outer_second_svlan_tag:1;
/* The second vlan in the inner header of the packet is c-vlan (0x8100).
* inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
*/
- u32 inner_second_cvlan_tag:1;
- /* The second vlan in the outer header of the packet is c-vlan (0x8100).
+ u32 inner_second_svlan_tag:1;
+ /* The second vlan in the outer header of the packet is s-vlan (0x8a88).
* outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
*/
- u32 outer_second_cvlan_tag:1;
- u32 gre_key_l:8; /* GRE Key [7:0] (outer) */
+ u32 reserved_auto2:12;
+ /* The second vlan in the inner header of the packet is s-vlan (0x8a88).
+ * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ */
+ u32 gre_protocol:16; /* GRE Protocol (outer) */
+
u32 gre_key_h:24; /* GRE Key[31:8] (outer) */
- u32 reserved_auto4:8;
+ u32 gre_key_l:8; /* GRE Key [7:0] (outer) */
+
u32 vxlan_vni:24; /* VXLAN VNI (outer) */
- u32 geneve_oam:1; /* GENEVE OAM field (outer) */
- u32 reserved_auto5:7;
+ u32 reserved_auto3:8;
+
u32 geneve_vni:24; /* GENEVE VNI field (outer) */
+ u32 reserved_auto4:6;
+ u32 geneve_tlv_option_0_exist:1;
+ u32 geneve_oam:1; /* GENEVE OAM field (outer) */
+
+ u32 reserved_auto5:12;
u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */
+
u32 reserved_auto6:12;
u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */
- u32 reserved_auto7:12;
- u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */
+
+ u32 reserved_auto7:10;
u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */
- u32 reserved_auto8:10;
+ u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */
+
+ u32 reserved_auto8:8;
u32 bth_dst_qp:24; /* Destination QP in BTH header */
- u32 reserved_auto9:8;
- u8 reserved_auto10[20];
+
+ u32 reserved_auto9;
+ u32 outer_esp_spi;
+ u32 reserved_auto10[3];
};
struct mlx5dr_match_misc2 {
- u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */
- u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */
- u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */
u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */
- u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */
- u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */
- u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */
+ u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */
+ u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */
+ u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */
+
u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */
- u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */
- u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */
- u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */
+ u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */
+ u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */
+ u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */
+
u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */
- u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */
- u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */
- u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */
+
u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */
+ u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */
+
u32 metadata_reg_c_7; /* metadata_reg_c_7 */
u32 metadata_reg_c_6; /* metadata_reg_c_6 */
u32 metadata_reg_c_5; /* metadata_reg_c_5 */
@@ -554,8 +726,7 @@ struct mlx5dr_match_misc2 {
u32 metadata_reg_c_1; /* metadata_reg_c_1 */
u32 metadata_reg_c_0; /* metadata_reg_c_0 */
u32 metadata_reg_a; /* metadata_reg_a */
- u32 metadata_reg_b; /* metadata_reg_b */
- u8 reserved_auto2[8];
+ u32 reserved_auto1[3];
};
struct mlx5dr_match_misc3 {
@@ -563,18 +734,57 @@ struct mlx5dr_match_misc3 {
u32 outer_tcp_seq_num;
u32 inner_tcp_ack_num;
u32 outer_tcp_ack_num;
- u32 outer_vxlan_gpe_vni:24;
+
u32 reserved_auto1:8;
- u32 reserved_auto2:16;
- u32 outer_vxlan_gpe_flags:8;
+ u32 outer_vxlan_gpe_vni:24;
+
u32 outer_vxlan_gpe_next_protocol:8;
+ u32 outer_vxlan_gpe_flags:8;
+ u32 reserved_auto2:16;
+
u32 icmpv4_header_data;
u32 icmpv6_header_data;
- u32 icmpv6_code:8;
- u32 icmpv6_type:8;
- u32 icmpv4_code:8;
- u32 icmpv4_type:8;
- u8 reserved_auto3[0x1c];
+
+ u8 icmpv4_type;
+ u8 icmpv4_code;
+ u8 icmpv6_type;
+ u8 icmpv6_code;
+
+ u32 geneve_tlv_option_0_data;
+
+ u32 gtpu_teid;
+
+ u8 gtpu_msg_type;
+ u8 gtpu_msg_flags;
+ u32 reserved_auto3:16;
+
+ u32 gtpu_dw_2;
+ u32 gtpu_first_ext_dw_0;
+ u32 gtpu_dw_0;
+ u32 reserved_auto4;
+};
+
+struct mlx5dr_match_misc4 {
+ u32 prog_sample_field_value_0;
+ u32 prog_sample_field_id_0;
+ u32 prog_sample_field_value_1;
+ u32 prog_sample_field_id_1;
+ u32 prog_sample_field_value_2;
+ u32 prog_sample_field_id_2;
+ u32 prog_sample_field_value_3;
+ u32 prog_sample_field_id_3;
+ u32 reserved_auto1[8];
+};
+
+struct mlx5dr_match_misc5 {
+ u32 macsec_tag_0;
+ u32 macsec_tag_1;
+ u32 macsec_tag_2;
+ u32 macsec_tag_3;
+ u32 tunnel_header_0;
+ u32 tunnel_header_1;
+ u32 tunnel_header_2;
+ u32 tunnel_header_3;
};
struct mlx5dr_match_param {
@@ -583,26 +793,51 @@ struct mlx5dr_match_param {
struct mlx5dr_match_spec inner;
struct mlx5dr_match_misc2 misc2;
struct mlx5dr_match_misc3 misc3;
+ struct mlx5dr_match_misc4 misc4;
+ struct mlx5dr_match_misc5 misc5;
};
-#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
- (_misc3)->icmpv4_code || \
- (_misc3)->icmpv4_header_data)
+#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
+ (_misc3)->icmpv4_code || \
+ (_misc3)->icmpv4_header_data)
+
+#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \
+ (_spec)->src_ip_95_64 || \
+ (_spec)->src_ip_63_32 || \
+ (_spec)->src_ip_31_0)
+
+#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \
+ (_spec)->dst_ip_95_64 || \
+ (_spec)->dst_ip_63_32 || \
+ (_spec)->dst_ip_31_0)
struct mlx5dr_esw_caps {
u64 drop_icm_address_rx;
u64 drop_icm_address_tx;
u64 uplink_icm_address_rx;
u64 uplink_icm_address_tx;
- bool sw_owner;
+ u8 sw_owner:1;
+ u8 sw_owner_v2:1;
};
struct mlx5dr_cmd_vport_cap {
u16 vport_gvmi;
u16 vhca_gvmi;
+ u16 num;
u64 icm_address_rx;
u64 icm_address_tx;
- u32 num;
+};
+
+struct mlx5dr_roce_cap {
+ u8 roce_en:1;
+ u8 fl_rc_qp_when_roce_disabled:1;
+ u8 fl_rc_qp_when_roce_enabled:1;
+};
+
+struct mlx5dr_vports {
+ struct mlx5dr_cmd_vport_cap esw_manager_caps;
+ struct mlx5dr_cmd_vport_cap uplink_caps;
+ struct xarray vports_caps_xa;
};
struct mlx5dr_cmd_caps {
@@ -619,23 +854,42 @@ struct mlx5dr_cmd_caps {
u8 flex_parser_id_icmp_dw1;
u8 flex_parser_id_icmpv6_dw0;
u8 flex_parser_id_icmpv6_dw1;
+ u8 flex_parser_id_geneve_tlv_option_0;
+ u8 flex_parser_id_mpls_over_gre;
+ u8 flex_parser_id_mpls_over_udp;
+ u8 flex_parser_id_gtpu_dw_0;
+ u8 flex_parser_id_gtpu_teid;
+ u8 flex_parser_id_gtpu_dw_2;
+ u8 flex_parser_id_gtpu_first_ext_dw_0;
+ u8 flex_parser_ok_bits_supp;
u8 max_ft_level;
u16 roce_min_src_udp;
- u8 num_esw_ports;
+ u8 sw_format_ver;
bool eswitch_manager;
bool rx_sw_owner;
bool tx_sw_owner;
bool fdb_sw_owner;
- u32 num_vports;
+ u8 rx_sw_owner_v2:1;
+ u8 tx_sw_owner_v2:1;
+ u8 fdb_sw_owner_v2:1;
struct mlx5dr_esw_caps esw_caps;
- struct mlx5dr_cmd_vport_cap *vports_caps;
+ struct mlx5dr_vports vports;
bool prio_tag_required;
+ struct mlx5dr_roce_cap roce_caps;
+ u8 is_ecpf:1;
+ u8 isolate_vl_tc:1;
+};
+
+enum mlx5dr_domain_nic_type {
+ DR_DOMAIN_NIC_TYPE_RX,
+ DR_DOMAIN_NIC_TYPE_TX,
};
struct mlx5dr_domain_rx_tx {
u64 drop_icm_addr;
u64 default_icm_addr;
- enum mlx5dr_ste_entry_type ste_type;
+ enum mlx5dr_domain_nic_type type;
+ struct mutex mutex; /* protect rx/tx domain */
};
struct mlx5dr_domain_info {
@@ -649,10 +903,6 @@ struct mlx5dr_domain_info {
struct mlx5dr_cmd_caps caps;
};
-struct mlx5dr_domain_cache {
- struct mlx5dr_fw_recalc_cs_ft **recalc_cs_ft;
-};
-
struct mlx5dr_domain {
struct mlx5dr_domain *peer_dmn;
struct mlx5_core_dev *mdev;
@@ -660,18 +910,21 @@ struct mlx5dr_domain {
struct mlx5_uars_page *uar;
enum mlx5dr_domain_type type;
refcount_t refcount;
- struct mutex mutex; /* protect domain */
struct mlx5dr_icm_pool *ste_icm_pool;
struct mlx5dr_icm_pool *action_icm_pool;
struct mlx5dr_send_ring *send_ring;
struct mlx5dr_domain_info info;
- struct mlx5dr_domain_cache cache;
+ struct xarray csum_fts_xa;
+ struct mlx5dr_ste_ctx *ste_ctx;
+ struct list_head dbg_tbl_list;
+ struct mlx5dr_dbg_dump_info dump_info;
};
struct mlx5dr_table_rx_tx {
struct mlx5dr_ste_htbl *s_anchor;
struct mlx5dr_domain_rx_tx *nic_dmn;
u64 default_icm_addr;
+ struct list_head nic_matcher_list;
};
struct mlx5dr_table {
@@ -685,6 +938,7 @@ struct mlx5dr_table {
struct list_head matcher_list;
struct mlx5dr_action *miss_action;
refcount_t refcount;
+ struct list_head dbg_node;
};
struct mlx5dr_matcher_rx_tx {
@@ -698,76 +952,120 @@ struct mlx5dr_matcher_rx_tx {
u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX];
u64 default_icm_addr;
struct mlx5dr_table_rx_tx *nic_tbl;
+ u32 prio;
+ struct list_head list_node;
+ u32 rules;
};
struct mlx5dr_matcher {
struct mlx5dr_table *tbl;
struct mlx5dr_matcher_rx_tx rx;
struct mlx5dr_matcher_rx_tx tx;
- struct list_head matcher_list;
- u16 prio;
+ struct list_head list_node; /* Used for both matchers and dbg managing */
+ u32 prio;
struct mlx5dr_match_param mask;
u8 match_criteria;
refcount_t refcount;
- struct mlx5dv_flow_matcher *dv_matcher;
+ struct list_head dbg_rule_list;
};
-struct mlx5dr_rule_member {
- struct mlx5dr_ste *ste;
- /* attached to mlx5dr_rule via this */
+struct mlx5dr_ste_action_modify_field {
+ u16 hw_field;
+ u8 start;
+ u8 end;
+ u8 l3_type;
+ u8 l4_type;
+};
+
+struct mlx5dr_action_rewrite {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_icm_chunk *chunk;
+ u8 *data;
+ u16 num_of_actions;
+ u32 index;
+ u8 allow_rx:1;
+ u8 allow_tx:1;
+ u8 modify_ttl:1;
+};
+
+struct mlx5dr_action_reformat {
+ struct mlx5dr_domain *dmn;
+ u32 id;
+ u32 size;
+ u8 param_0;
+ u8 param_1;
+};
+
+struct mlx5dr_action_sampler {
+ struct mlx5dr_domain *dmn;
+ u64 rx_icm_addr;
+ u64 tx_icm_addr;
+ u32 sampler_id;
+};
+
+struct mlx5dr_action_dest_tbl {
+ u8 is_fw_tbl:1;
+ union {
+ struct mlx5dr_table *tbl;
+ struct {
+ struct mlx5dr_domain *dmn;
+ u32 id;
+ u32 group_id;
+ enum fs_flow_table_type type;
+ u64 rx_icm_addr;
+ u64 tx_icm_addr;
+ struct mlx5dr_action **ref_actions;
+ u32 num_of_ref_actions;
+ } fw_tbl;
+ };
+};
+
+struct mlx5dr_action_ctr {
+ u32 ctr_id;
+ u32 offset;
+};
+
+struct mlx5dr_action_vport {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_cmd_vport_cap *caps;
+};
+
+struct mlx5dr_action_push_vlan {
+ u32 vlan_hdr; /* tpid_pcp_dei_vid */
+};
+
+struct mlx5dr_action_flow_tag {
+ u32 flow_tag;
+};
+
+struct mlx5dr_rule_action_member {
+ struct mlx5dr_action *action;
struct list_head list;
- /* attached to mlx5dr_ste via this */
- struct list_head use_ste_list;
+};
+
+struct mlx5dr_action_aso_flow_meter {
+ struct mlx5dr_domain *dmn;
+ u32 obj_id;
+ u32 offset;
+ u8 dest_reg_id;
+ u8 init_color;
};
struct mlx5dr_action {
enum mlx5dr_action_type action_type;
refcount_t refcount;
+
union {
- struct {
- struct mlx5dr_domain *dmn;
- struct mlx5dr_icm_chunk *chunk;
- u8 *data;
- u32 data_size;
- u16 num_of_actions;
- u32 index;
- u8 allow_rx:1;
- u8 allow_tx:1;
- u8 modify_ttl:1;
- } rewrite;
- struct {
- struct mlx5dr_domain *dmn;
- u32 reformat_id;
- u32 reformat_size;
- } reformat;
- struct {
- u8 is_fw_tbl:1;
- union {
- struct mlx5dr_table *tbl;
- struct {
- struct mlx5dr_domain *dmn;
- u32 id;
- u32 group_id;
- enum fs_flow_table_type type;
- u64 rx_icm_addr;
- u64 tx_icm_addr;
- struct mlx5dr_action **ref_actions;
- u32 num_of_ref_actions;
- } fw_tbl;
- };
- } dest_tbl;
- struct {
- u32 ctr_id;
- u32 offeset;
- } ctr;
- struct {
- struct mlx5dr_domain *dmn;
- struct mlx5dr_cmd_vport_cap *caps;
- } vport;
- struct {
- u32 vlan_hdr; /* tpid_pcp_dei_vid */
- } push_vlan;
- u32 flow_tag;
+ void *data;
+ struct mlx5dr_action_rewrite *rewrite;
+ struct mlx5dr_action_reformat *reformat;
+ struct mlx5dr_action_sampler *sampler;
+ struct mlx5dr_action_dest_tbl *dest_tbl;
+ struct mlx5dr_action_ctr *ctr;
+ struct mlx5dr_action_vport *vport;
+ struct mlx5dr_action_push_vlan *push_vlan;
+ struct mlx5dr_action_flow_tag *flow_tag;
+ struct mlx5dr_action_aso_flow_meter *aso;
};
};
@@ -785,8 +1083,8 @@ struct mlx5dr_htbl_connect_info {
};
struct mlx5dr_rule_rx_tx {
- struct list_head rule_members_list;
struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_ste *last_rule_ste;
};
struct mlx5dr_rule {
@@ -794,19 +1092,26 @@ struct mlx5dr_rule {
struct mlx5dr_rule_rx_tx rx;
struct mlx5dr_rule_rx_tx tx;
struct list_head rule_actions_list;
+ struct list_head dbg_node;
+ u32 flow_source;
};
-void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
- struct mlx5dr_ste *ste);
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste,
+ bool force);
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+ struct mlx5dr_ste *curr_ste,
+ int *num_of_stes);
struct mlx5dr_icm_chunk {
- struct mlx5dr_icm_bucket *bucket;
+ struct mlx5dr_icm_buddy_mem *buddy_mem;
struct list_head chunk_list;
- u32 rkey;
- u32 num_of_entries;
- u32 byte_size;
- u64 icm_addr;
- u64 mr_addr;
+
+ /* indicates the index of this chunk in the whole memory,
+ * used for deleting the chunk from the buddy
+ */
+ unsigned int seg;
+ enum mlx5dr_icm_chunk_size size;
/* Memory optimisation */
struct mlx5dr_ste *ste_arr;
@@ -814,23 +1119,54 @@ struct mlx5dr_icm_chunk {
struct list_head *miss_list;
};
-static inline int
-mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps)
+static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn)
{
- return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED;
+ mutex_lock(&nic_dmn->mutex);
}
-static inline int
-mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps)
+static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ mutex_unlock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_domain_nic_lock(&dmn->info.rx);
+ mlx5dr_domain_nic_lock(&dmn->info.tx);
+}
+
+static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn)
{
- return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED;
+ mlx5dr_domain_nic_unlock(&dmn->info.tx);
+ mlx5dr_domain_nic_unlock(&dmn->info.rx);
}
+int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+
int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
enum mlx5dr_ipv outer_ipv,
enum mlx5dr_ipv inner_ipv);
+u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk);
+u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk);
+u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste);
+
+static inline int
+mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type)
+{
+ if (icm_type == DR_ICM_TYPE_STE)
+ return DR_STE_SIZE;
+
+ return DR_MODIFY_ACTION_SIZE;
+}
+
static inline u32
mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size)
{
@@ -844,29 +1180,34 @@ mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size,
int num_of_entries;
int entry_size;
- if (icm_type == DR_ICM_TYPE_STE)
- entry_size = DR_STE_SIZE;
- else
- entry_size = DR_MODIFY_ACTION_SIZE;
-
+ entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(icm_type);
num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
return entry_size * num_of_entries;
}
-static inline struct mlx5dr_cmd_vport_cap *
-mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport)
+static inline int
+mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl)
{
- if (!caps->vports_caps ||
- (vport >= caps->num_vports && vport != WIRE_PORT))
- return NULL;
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size);
- if (vport == WIRE_PORT)
- vport = caps->num_vports;
+ /* Threshold is 50%, one is added to table of size 1 */
+ return (num_of_entries + 1) / 2;
+}
- return &caps->vports_caps[vport];
+static inline bool
+mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl)
+{
+ if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
+ return false;
+
+ return true;
}
+struct mlx5dr_cmd_vport_cap *
+mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport);
+
struct mlx5dr_cmd_query_flow_table_details {
u8 status;
u8 level;
@@ -876,6 +1217,7 @@ struct mlx5dr_cmd_query_flow_table_details {
struct mlx5dr_cmd_create_flow_table_attr {
u32 table_type;
+ u16 uid;
u64 icm_addr_rx;
u64 icm_addr_tx;
u8 level;
@@ -896,13 +1238,17 @@ int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev,
bool other_vport, u16 vport_number, u16 *gvmi);
int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
struct mlx5dr_esw_caps *caps);
+int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev,
+ u32 sampler_id,
+ u64 *rx_icm_addr,
+ u64 *tx_icm_addr);
int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev);
int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
u32 table_type,
u32 table_id,
u32 group_id,
u32 modify_header_id,
- u32 vport_id);
+ u16 vport_id);
int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
u32 table_type,
u32 table_id);
@@ -934,6 +1280,8 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
struct mlx5dr_cmd_query_flow_table_details *output);
int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
enum mlx5_reformat_ctx_type rt,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
size_t reformat_size,
void *reformat_data,
u32 *reformat_id);
@@ -946,19 +1294,6 @@ struct mlx5dr_cmd_gid_attr {
u32 roce_ver;
};
-struct mlx5dr_cmd_qp_create_attr {
- u32 page_id;
- u32 pdn;
- u32 cqn;
- u32 pm_state;
- u32 service_type;
- u32 buff_umem_id;
- u32 db_umem_id;
- u32 sq_wqe_cnt;
- u32 rq_wqe_cnt;
- u32 rq_wqe_shift;
-};
-
int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
u16 index, struct mlx5dr_cmd_gid_attr *attr);
@@ -970,27 +1305,31 @@ struct mlx5dr_icm_chunk *
mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
enum mlx5dr_icm_chunk_size chunk_size);
void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk);
-bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste);
+
+void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p, u32 ste_size);
int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
struct mlx5dr_domain_rx_tx *nic_dmn,
struct mlx5dr_ste_htbl *htbl,
struct mlx5dr_htbl_connect_info *connect_info,
bool update_hw_ste);
-void mlx5dr_ste_set_formatted_ste(u16 gvmi,
- struct mlx5dr_domain_rx_tx *nic_dmn,
+void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ u16 gvmi,
+ enum mlx5dr_domain_nic_type nic_type,
struct mlx5dr_ste_htbl *htbl,
u8 *formatted_ste,
struct mlx5dr_htbl_connect_info *connect_info);
void mlx5dr_ste_copy_param(u8 match_criteria,
struct mlx5dr_match_param *set_param,
- struct mlx5dr_match_parameters *mask);
+ struct mlx5dr_match_parameters *mask,
+ bool clear);
struct mlx5dr_qp {
struct mlx5_core_dev *mdev;
struct mlx5_wq_qp wq;
struct mlx5_uars_page *uar;
struct mlx5_wq_ctrl wq_ctrl;
- struct mlx5_core_qp mqp;
+ u32 qpn;
struct {
unsigned int pc;
unsigned int cc;
@@ -1017,7 +1356,7 @@ struct mlx5dr_cq {
struct mlx5dr_mr {
struct mlx5_core_dev *mdev;
- struct mlx5_core_mkey mkey;
+ u32 mkey;
dma_addr_t dma_addr;
void *addr;
size_t size;
@@ -1040,9 +1379,10 @@ struct mlx5dr_send_ring {
u32 tx_head;
void *buf;
u32 buf_size;
- struct ib_wc wc[MAX_SEND_CQE];
u8 sync_buff[MIN_READ_SYNC];
struct mlx5dr_mr *sync_mr;
+ spinlock_t lock; /* Protect the data path of the send ring */
+ bool err_state; /* send_ring is not usable in err state */
};
int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
@@ -1077,6 +1417,7 @@ struct mlx5dr_cmd_flow_destination_hw_info {
u32 ft_num;
u32 ft_id;
u32 counter_id;
+ u32 sampler_id;
struct {
u16 num;
u16 vhca_id;
@@ -1093,6 +1434,7 @@ struct mlx5dr_cmd_fte_info {
u32 *val;
struct mlx5_flow_act action;
struct mlx5dr_cmd_flow_destination_hw_info *dest_arr;
+ bool ignore_flow_level;
};
int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
@@ -1101,6 +1443,8 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
u32 group_id,
struct mlx5dr_cmd_fte_info *fte);
+bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps);
+
struct mlx5dr_fw_recalc_cs_ft {
u64 rx_icm_addr;
u32 table_id;
@@ -1109,18 +1453,20 @@ struct mlx5dr_fw_recalc_cs_ft {
};
struct mlx5dr_fw_recalc_cs_ft *
-mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num);
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num);
void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft);
-int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
- u32 vport_num,
- u64 *rx_icm_addr);
+int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+ u16 vport_num,
+ u64 *rx_icm_addr);
int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_cmd_flow_destination_hw_info *dest,
int num_dest,
bool reformat_req,
u32 *tbl_id,
- u32 *group_id);
+ u32 *group_id,
+ bool ignore_flow_level,
+ u32 flow_source);
void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
u32 group_id);
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index c2027192e21e..13b6d4721e17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies */
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#include "fs_core.h"
#include "fs_cmd.h"
@@ -43,11 +44,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action);
if (err && action) {
err = mlx5dr_action_destroy(action);
- if (err) {
- action = NULL;
- mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
- err);
- }
+ if (err)
+ mlx5_core_err(ns->dev,
+ "Failed to destroy action (%d)\n", err);
+ action = NULL;
}
ft->fs_dr_table.miss_action = action;
if (old_miss_action) {
@@ -62,7 +62,7 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int log_size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
struct mlx5dr_table *tbl;
@@ -71,7 +71,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
if (mlx5_dr_is_fw_table(ft->flags))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
- log_size,
+ ft_attr,
next_ft);
flags = ft->flags;
/* turn off encap/decap if not supported for sw-str by fw */
@@ -79,7 +79,8 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags);
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags,
+ ft_attr->uid);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
return -EINVAL;
@@ -97,6 +98,8 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
}
}
+ ft->max_fte = INT_MAX;
+
return 0;
}
@@ -131,6 +134,9 @@ static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
+
return set_miss_action(ns, ft, next_ft);
}
@@ -140,7 +146,7 @@ static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_group *fg)
{
struct mlx5dr_matcher *matcher;
- u16 priority = MLX5_GET(create_flow_group_in, in,
+ u32 priority = MLX5_GET(create_flow_group_in, in,
start_flow_index);
u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
in,
@@ -189,6 +195,15 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
dest_attr->vport.vhca_id);
}
+static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+ return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1,
+ dest_attr->vport.vhca_id);
+}
+
static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain,
struct mlx5_flow_rule *dst)
{
@@ -213,11 +228,16 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai
static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst)
{
- return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
}
-#define MLX5_FLOW_CONTEXT_ACTION_MAX 20
+/* We want to support a rule with 32 destinations, which means we need to
+ * account for 32 destinations plus usually a counter plus one more action
+ * for a multi-destination flow table.
+ */
+#define MLX5_FLOW_CONTEXT_ACTION_MAX 34
static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_group *group,
@@ -279,35 +299,16 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
/* The order of the actions are must to be keep, only the following
* order is supported by SW steering:
- * TX: push vlan -> modify header -> encap
+ * TX: modify header -> push vlan -> encap
* RX: decap -> pop vlan -> modify header
*/
- if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
- tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]);
- if (!tmp_action) {
- err = -ENOMEM;
- goto free_actions;
- }
- fs_dr_actions[fs_dr_num_actions++] = tmp_action;
- actions[num_actions++] = tmp_action;
- }
-
- if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
- tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]);
- if (!tmp_action) {
- err = -ENOMEM;
- goto free_actions;
- }
- fs_dr_actions[fs_dr_num_actions++] = tmp_action;
- actions[num_actions++] = tmp_action;
- }
-
if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
enum mlx5dr_action_reformat_type decap_type =
DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2;
tmp_action = mlx5dr_action_create_packet_reformat(domain,
- decap_type, 0,
+ decap_type,
+ 0, 0, 0,
NULL);
if (!tmp_action) {
err = -ENOMEM;
@@ -354,6 +355,26 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
actions[num_actions++] =
fte->action.modify_hdr->action.dr_action;
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
if (delay_encap_set)
actions[num_actions++] =
fte->action.pkt_reformat->action.dr_action;
@@ -384,10 +405,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
list_for_each_entry(dst, &fte->node.children, node.list) {
enum mlx5_flow_destination_type type = dst->dest_attr.type;
+ u32 id;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
- num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) {
- err = -ENOSPC;
+ if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
goto free_actions;
}
@@ -404,8 +426,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
fs_dr_actions[fs_dr_num_actions++] = tmp_action;
term_actions[num_term_actions++].dest = tmp_action;
break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
- tmp_action = create_vport_action(domain, dst);
+ tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ?
+ create_vport_action(domain, dst) :
+ create_uplink_action(domain, dst);
if (!tmp_action) {
err = -ENOMEM;
goto free_actions;
@@ -420,6 +445,28 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
num_term_actions++;
break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = dst->dest_attr.ft_num;
+ tmp_action = mlx5dr_action_create_dest_table_num(domain,
+ id);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ id = dst->dest_attr.sampler_id;
+ tmp_action = mlx5dr_action_create_flow_sampler(domain,
+ id);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ break;
default:
err = -EOPNOTSUPP;
goto free_actions;
@@ -435,8 +482,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
- err = -ENOSPC;
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
goto free_actions;
}
@@ -453,17 +501,58 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
}
}
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ tmp_action =
+ mlx5dr_action_create_aso(domain,
+ fte->action.exe_aso.object_id,
+ fte->action.exe_aso.return_reg_id,
+ fte->action.exe_aso.type,
+ fte->action.exe_aso.flow_meter.init_color,
+ fte->action.exe_aso.flow_meter.meter_idx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
params.match_sz = match_sz;
params.match_buf = (u64 *)fte->val;
if (num_term_actions == 1) {
- if (term_actions->reformat)
+ if (term_actions->reformat) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
actions[num_actions++] = term_actions->reformat;
+ }
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
actions[num_actions++] = term_actions->dest;
} else if (num_term_actions > 1) {
+ bool ignore_flow_level =
+ !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+ u32 flow_source = fte->flow_context.flow_source;
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
term_actions,
- num_term_actions);
+ num_term_actions,
+ ignore_flow_level,
+ flow_source);
if (!tmp_action) {
err = -EOPNOTSUPP;
goto free_actions;
@@ -475,7 +564,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
&params,
num_actions,
- actions);
+ actions,
+ fte->flow_context.flow_source);
if (!rule) {
err = -EINVAL;
goto free_actions;
@@ -507,9 +597,7 @@ out_err:
}
static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
- int reformat_type,
- size_t size,
- void *reformat_data,
+ struct mlx5_pkt_reformat_params *params,
enum mlx5_flow_namespace_type namespace,
struct mlx5_pkt_reformat *pkt_reformat)
{
@@ -517,7 +605,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
struct mlx5dr_action *action;
int dr_reformat;
- switch (reformat_type) {
+ switch (params->type) {
case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
@@ -529,16 +617,24 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3;
break;
+ case MLX5_REFORMAT_TYPE_INSERT_HDR:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR;
+ break;
+ case MLX5_REFORMAT_TYPE_REMOVE_HDR:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_REMOVE_HDR;
+ break;
default:
mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
- reformat_type);
+ params->type);
return -EOPNOTSUPP;
}
action = mlx5dr_action_create_packet_reformat(dr_domain,
dr_reformat,
- size,
- reformat_data);
+ params->param_0,
+ params->param_1,
+ params->size,
+ params->data);
if (!action) {
mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n");
return -EINVAL;
@@ -564,7 +660,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
struct mlx5dr_action *action;
size_t actions_sz;
- actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) *
+ actions_sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) *
num_actions;
action = mlx5dr_action_create_modify_header(dr_domain, 0,
actions_sz,
@@ -585,11 +681,15 @@ static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *n
mlx5dr_action_destroy(modify_hdr->action.dr_action);
}
-static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *group,
- int modify_mask,
- struct fs_fte *fte)
+static int
+mlx5_cmd_dr_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_dr_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
{
return -EOPNOTSUPP;
}
@@ -618,6 +718,36 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
return 0;
}
+static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ struct fs_fte fte_tmp = {};
+ int ret;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte);
+
+ /* Backup current dr rule details */
+ fte_tmp.fs_dr_rule = fte->fs_dr_rule;
+ memset(&fte->fs_dr_rule, 0, sizeof(struct mlx5_fs_dr_rule));
+
+ /* First add the new updated rule, then delete the old rule */
+ ret = mlx5_cmd_dr_create_fte(ns, ft, group, fte);
+ if (ret)
+ goto restore_fte;
+
+ ret = mlx5_cmd_dr_delete_fte(ns, ft, &fte_tmp);
+ WARN_ONCE(ret, "dr update fte duplicate rule deletion failed\n");
+ return ret;
+
+restore_fte:
+ fte->fs_dr_rule = fte_tmp.fs_dr_rule;
+ return ret;
+}
+
static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns)
{
@@ -647,6 +777,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
}
+static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ if (ft_type != FS_FT_FDB ||
+ MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5)
+ return 0;
+
+ return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+}
+
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
{
return mlx5dr_is_supported(dev);
@@ -666,9 +806,12 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
.packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc,
.modify_header_alloc = mlx5_cmd_dr_modify_header_alloc,
.modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_dr_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_dr_destroy_match_definer,
.set_peer = mlx5_cmd_dr_set_peer,
.create_ns = mlx5_cmd_dr_create_ns,
.destroy_ns = mlx5_cmd_dr_destroy_ns,
+ .get_capabilities = mlx5_cmd_dr_get_capabilities,
};
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
index 1fb185d6ac7f..d168622063d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
@@ -14,10 +14,6 @@ struct mlx5_fs_dr_action {
struct mlx5dr_action *dr_action;
};
-struct mlx5_fs_dr_ns {
- struct mlx5_dr_ns *dr_ns;
-};
-
struct mlx5_fs_dr_rule {
struct mlx5dr_rule *dr_rule;
/* Only actions created by fs_dr */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
index e01c3766c7de..fb078fa0f0cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -5,100 +5,9 @@
#define MLX5_IFC_DR_H
enum {
- MLX5DR_ACTION_MDFY_HW_FLD_L2_0 = 0,
- MLX5DR_ACTION_MDFY_HW_FLD_L2_1 = 1,
- MLX5DR_ACTION_MDFY_HW_FLD_L2_2 = 2,
- MLX5DR_ACTION_MDFY_HW_FLD_L3_0 = 3,
- MLX5DR_ACTION_MDFY_HW_FLD_L3_1 = 4,
- MLX5DR_ACTION_MDFY_HW_FLD_L3_2 = 5,
- MLX5DR_ACTION_MDFY_HW_FLD_L3_3 = 6,
- MLX5DR_ACTION_MDFY_HW_FLD_L3_4 = 7,
- MLX5DR_ACTION_MDFY_HW_FLD_L4_0 = 8,
- MLX5DR_ACTION_MDFY_HW_FLD_L4_1 = 9,
- MLX5DR_ACTION_MDFY_HW_FLD_MPLS = 10,
- MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_0 = 11,
- MLX5DR_ACTION_MDFY_HW_FLD_REG_0 = 12,
- MLX5DR_ACTION_MDFY_HW_FLD_REG_1 = 13,
- MLX5DR_ACTION_MDFY_HW_FLD_REG_2 = 14,
- MLX5DR_ACTION_MDFY_HW_FLD_REG_3 = 15,
- MLX5DR_ACTION_MDFY_HW_FLD_L4_2 = 16,
- MLX5DR_ACTION_MDFY_HW_FLD_FLEX_0 = 17,
- MLX5DR_ACTION_MDFY_HW_FLD_FLEX_1 = 18,
- MLX5DR_ACTION_MDFY_HW_FLD_FLEX_2 = 19,
- MLX5DR_ACTION_MDFY_HW_FLD_FLEX_3 = 20,
- MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_1 = 21,
- MLX5DR_ACTION_MDFY_HW_FLD_METADATA = 22,
- MLX5DR_ACTION_MDFY_HW_FLD_RESERVED = 23,
-};
-
-enum {
- MLX5DR_ACTION_MDFY_HW_OP_COPY = 0x1,
- MLX5DR_ACTION_MDFY_HW_OP_SET = 0x2,
- MLX5DR_ACTION_MDFY_HW_OP_ADD = 0x3,
-};
-
-enum {
- MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE = 0x0,
- MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4 = 0x1,
- MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6 = 0x2,
-};
-
-enum {
- MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE = 0x0,
- MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP = 0x1,
- MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP = 0x2,
-};
-
-enum {
- MLX5DR_STE_LU_TYPE_NOP = 0x00,
- MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP = 0x05,
- MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I = 0x0a,
- MLX5DR_STE_LU_TYPE_ETHL2_DST_O = 0x06,
- MLX5DR_STE_LU_TYPE_ETHL2_DST_I = 0x07,
- MLX5DR_STE_LU_TYPE_ETHL2_DST_D = 0x1b,
- MLX5DR_STE_LU_TYPE_ETHL2_SRC_O = 0x08,
- MLX5DR_STE_LU_TYPE_ETHL2_SRC_I = 0x09,
- MLX5DR_STE_LU_TYPE_ETHL2_SRC_D = 0x1c,
- MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_O = 0x36,
- MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_I = 0x37,
- MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_D = 0x38,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a,
- MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b,
- MLX5DR_STE_LU_TYPE_ETHL4_O = 0x13,
- MLX5DR_STE_LU_TYPE_ETHL4_I = 0x14,
- MLX5DR_STE_LU_TYPE_ETHL4_D = 0x21,
- MLX5DR_STE_LU_TYPE_ETHL4_MISC_O = 0x2c,
- MLX5DR_STE_LU_TYPE_ETHL4_MISC_I = 0x2d,
- MLX5DR_STE_LU_TYPE_ETHL4_MISC_D = 0x2e,
- MLX5DR_STE_LU_TYPE_MPLS_FIRST_O = 0x15,
- MLX5DR_STE_LU_TYPE_MPLS_FIRST_I = 0x24,
- MLX5DR_STE_LU_TYPE_MPLS_FIRST_D = 0x25,
- MLX5DR_STE_LU_TYPE_GRE = 0x16,
- MLX5DR_STE_LU_TYPE_FLEX_PARSER_0 = 0x22,
- MLX5DR_STE_LU_TYPE_FLEX_PARSER_1 = 0x23,
- MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19,
- MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE = 0x18,
- MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0 = 0x2f,
- MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1 = 0x30,
MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f,
};
-enum mlx5dr_ste_entry_type {
- MLX5DR_STE_TYPE_TX = 1,
- MLX5DR_STE_TYPE_RX = 2,
- MLX5DR_STE_TYPE_MODIFY_PKT = 6,
-};
-
struct mlx5_ifc_ste_general_bits {
u8 entry_type[0x4];
u8 reserved_at_4[0x4];
@@ -519,10 +428,7 @@ struct mlx5_ifc_ste_gre_bits {
};
struct mlx5_ifc_ste_flex_parser_0_bits {
- u8 parser_3_label[0x14];
- u8 parser_3_exp[0x3];
- u8 parser_3_s_bos[0x1];
- u8 parser_3_ttl[0x8];
+ u8 flex_parser_3[0x20];
u8 flex_parser_2[0x20];
@@ -541,6 +447,14 @@ struct mlx5_ifc_ste_flex_parser_1_bits {
u8 flex_parser_4[0x20];
};
+struct mlx5_ifc_ste_flex_parser_ok_bits {
+ u8 flex_parser_3[0x20];
+ u8 flex_parser_2[0x20];
+ u8 flex_parsers_ok[0x8];
+ u8 reserved_at_48[0x18];
+ u8 flex_parser_0[0x20];
+};
+
struct mlx5_ifc_ste_flex_parser_tnl_bits {
u8 flex_parser_tunneling_header_63_32[0x20];
@@ -573,6 +487,25 @@ struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits {
+ u8 reserved_at_0[0x5];
+ u8 gtpu_msg_flags[0x3];
+ u8 gtpu_msg_type[0x8];
+ u8 reserved_at_10[0x10];
+
+ u8 gtpu_teid[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_tunnel_header_bits {
+ u8 tunnel_header_0[0x20];
+
+ u8 tunnel_header_1[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
struct mlx5_ifc_ste_general_purpose_bits {
u8 general_purpose_lookup_field[0x20];
@@ -641,4 +574,30 @@ struct mlx5_ifc_dr_action_hw_copy_bits {
u8 reserved_at_38[0x8];
};
+enum {
+ MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ = 2,
+};
+
+struct mlx5_ifc_ste_aso_flow_meter_action_bits {
+ u8 reserved_at_0[0xc];
+ u8 action[0x1];
+ u8 initial_color[0x2];
+ u8 line_id[0x1];
+};
+
+struct mlx5_ifc_ste_double_action_aso_v1_bits {
+ u8 action_id[0x8];
+ u8 aso_context_number[0x18];
+
+ u8 dest_reg_id[0x2];
+ u8 change_ordering_tag[0x1];
+ u8 aso_check_ordering[0x1];
+ u8 aso_context_type[0x4];
+ u8 reserved_at_28[0x8];
+ union {
+ u8 aso_fields[0x10];
+ struct mlx5_ifc_ste_aso_flow_meter_action_bits flow_meter;
+ };
+};
+
#endif /* MLX5_IFC_DR_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
new file mode 100644
index 000000000000..34c2bd17a8b4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#ifndef MLX5_IFC_DR_STE_V1_H
+#define MLX5_IFC_DR_STE_V1_H
+
+enum mlx5_ifc_ste_v1_modify_hdr_offset {
+ MLX5_MODIFY_HEADER_V1_QW_OFFSET = 0x20,
+};
+
+struct mlx5_ifc_ste_single_action_flow_tag_v1_bits {
+ u8 action_id[0x8];
+ u8 flow_tag[0x18];
+};
+
+struct mlx5_ifc_ste_single_action_modify_list_v1_bits {
+ u8 action_id[0x8];
+ u8 num_of_modify_actions[0x8];
+ u8 modify_actions_ptr[0x10];
+};
+
+struct mlx5_ifc_ste_single_action_remove_header_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 reserved_at_10[0x2];
+ u8 end_anchor[0x6];
+ u8 reserved_at_18[0x4];
+ u8 decap[0x1];
+ u8 vni_to_cqe[0x1];
+ u8 qos_profile[0x2];
+};
+
+struct mlx5_ifc_ste_single_action_remove_header_size_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 outer_l4_remove[0x1];
+ u8 reserved_at_11[0x1];
+ u8 start_offset[0x7];
+ u8 reserved_at_18[0x1];
+ u8 remove_size[0x6];
+};
+
+struct mlx5_ifc_ste_double_action_copy_v1_bits {
+ u8 action_id[0x8];
+ u8 destination_dw_offset[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_17[0x2];
+ u8 destination_length[0x6];
+
+ u8 reserved_at_20[0x8];
+ u8 source_dw_offset[0x8];
+ u8 reserved_at_30[0x2];
+ u8 source_right_shifter[0x6];
+ u8 reserved_at_38[0x8];
+};
+
+struct mlx5_ifc_ste_double_action_set_v1_bits {
+ u8 action_id[0x8];
+ u8 destination_dw_offset[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x2];
+ u8 destination_length[0x6];
+
+ u8 inline_data[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_add_v1_bits {
+ u8 action_id[0x8];
+ u8 destination_dw_offset[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x2];
+ u8 destination_length[0x6];
+
+ u8 add_value[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_insert_with_inline_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 start_offset[0x7];
+ u8 reserved_at_17[0x9];
+
+ u8 inline_data[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 start_offset[0x7];
+ u8 size[0x6];
+ u8 attributes[0x3];
+
+ u8 pointer[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits {
+ u8 action_id[0x8];
+ u8 modify_actions_pattern_pointer[0x18];
+
+ u8 number_of_modify_actions[0x8];
+ u8 modify_actions_argument_pointer[0x18];
+};
+
+struct mlx5_ifc_ste_match_bwc_v1_bits {
+ u8 entry_format[0x8];
+ u8 counter_id[0x18];
+
+ u8 miss_address_63_48[0x10];
+ u8 match_definer_ctx_idx[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 reserved_at_5a[0x1];
+ u8 match_polarity[0x1];
+ u8 reparse[0x1];
+ u8 reserved_at_5d[0x3];
+
+ u8 next_table_base_63_48[0x10];
+ u8 hash_definer_ctx_idx[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 hash_type[0x2];
+ u8 hash_after_actions[0x1];
+ u8 reserved_at_9e[0x2];
+
+ u8 byte_mask[0x10];
+ u8 next_entry_format[0x1];
+ u8 mask_mode[0x1];
+ u8 gvmi[0xe];
+
+ u8 action[0x40];
+};
+
+struct mlx5_ifc_ste_mask_and_match_v1_bits {
+ u8 entry_format[0x8];
+ u8 counter_id[0x18];
+
+ u8 miss_address_63_48[0x10];
+ u8 match_definer_ctx_idx[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 reserved_at_5a[0x1];
+ u8 match_polarity[0x1];
+ u8 reparse[0x1];
+ u8 reserved_at_5d[0x3];
+
+ u8 next_table_base_63_48[0x10];
+ u8 hash_definer_ctx_idx[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 hash_type[0x2];
+ u8 hash_after_actions[0x1];
+ u8 reserved_at_9e[0x2];
+
+ u8 action[0x60];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_v1_bits {
+ u8 reserved_at_0[0x1];
+ u8 sx_sniffer[0x1];
+ u8 functional_loopback[0x1];
+ u8 ip_fragmented[0x1];
+ u8 qp_type[0x2];
+ u8 encapsulation_type[0x2];
+ u8 port[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_vlan_qualifier[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+
+ u8 smac_47_16[0x20];
+
+ u8 smac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 reserved_at_60[0x6];
+ u8 tcp_syn[0x1];
+ u8 reserved_at_67[0x3];
+ u8 force_loopback[0x1];
+ u8 l2_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 l4_ok[0x1];
+ u8 second_vlan_qualifier[0x2];
+
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_dst_v1_bits {
+ u8 reserved_at_0[0x1];
+ u8 sx_sniffer[0x1];
+ u8 functional_lb[0x1];
+ u8 ip_fragmented[0x1];
+ u8 qp_type[0x2];
+ u8 encapsulation_type[0x2];
+ u8 port[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_vlan_qualifier[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 reserved_at_60[0x6];
+ u8 tcp_syn[0x1];
+ u8 reserved_at_67[0x3];
+ u8 force_lb[0x1];
+ u8 l2_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 l4_ok[0x1];
+ u8 second_vlan_qualifier[0x2];
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_dst_v1_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 smac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 reserved_at_50[0x2];
+ u8 functional_lb[0x1];
+ u8 reserved_at_53[0x5];
+ u8 port[0x2];
+ u8 l3_type[0x2];
+ u8 reserved_at_5c[0x2];
+ u8 first_vlan_qualifier[0x2];
+
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+ u8 smac_15_0[0x10];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_v1_bits {
+ u8 source_address[0x20];
+
+ u8 destination_address[0x20];
+
+ u8 source_port[0x10];
+ u8 destination_port[0x10];
+
+ u8 reserved_at_60[0x4];
+ u8 l4_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 fragmented[0x1];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 dscp[0x6];
+ u8 ecn[0x2];
+ u8 protocol[0x8];
+};
+
+struct mlx5_ifc_ste_eth_l2_tnl_v1_bits {
+ u8 l2_tunneling_network_id[0x20];
+
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 reserved_at_60[0x3];
+ u8 ip_fragmented[0x1];
+ u8 reserved_at_64[0x2];
+ u8 encp_type[0x2];
+ u8 reserved_at_68[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_vlan_qualifier[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_misc_v1_bits {
+ u8 identification[0x10];
+ u8 flags[0x3];
+ u8 fragment_offset[0xd];
+
+ u8 total_length[0x10];
+ u8 checksum[0x10];
+
+ u8 version[0x4];
+ u8 ihl[0x4];
+ u8 time_to_live[0x8];
+ u8 reserved_at_50[0x10];
+
+ u8 reserved_at_60[0x1c];
+ u8 voq_internal_prio[0x4];
+};
+
+struct mlx5_ifc_ste_eth_l4_v1_bits {
+ u8 ipv6_version[0x4];
+ u8 reserved_at_4[0x4];
+ u8 dscp[0x6];
+ u8 ecn[0x2];
+ u8 ipv6_hop_limit[0x8];
+ u8 protocol[0x8];
+
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+
+ u8 first_fragment[0x1];
+ u8 reserved_at_41[0xb];
+ u8 flow_label[0x14];
+
+ u8 tcp_data_offset[0x4];
+ u8 l4_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 fragmented[0x1];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 ipv6_paylen[0x10];
+};
+
+struct mlx5_ifc_ste_eth_l4_misc_v1_bits {
+ u8 window_size[0x10];
+ u8 urgent_pointer[0x10];
+
+ u8 ack_num[0x20];
+
+ u8 seq_num[0x20];
+
+ u8 length[0x10];
+ u8 checksum[0x10];
+};
+
+struct mlx5_ifc_ste_mpls_v1_bits {
+ u8 reserved_at_0[0x15];
+ u8 mpls_ok[0x1];
+ u8 mpls4_s_bit[0x1];
+ u8 mpls4_qualifier[0x1];
+ u8 mpls3_s_bit[0x1];
+ u8 mpls3_qualifier[0x1];
+ u8 mpls2_s_bit[0x1];
+ u8 mpls2_qualifier[0x1];
+ u8 mpls1_s_bit[0x1];
+ u8 mpls1_qualifier[0x1];
+ u8 mpls0_s_bit[0x1];
+ u8 mpls0_qualifier[0x1];
+
+ u8 mpls0_label[0x14];
+ u8 mpls0_exp[0x3];
+ u8 mpls0_s_bos[0x1];
+ u8 mpls0_ttl[0x8];
+
+ u8 mpls1_label[0x20];
+
+ u8 mpls2_label[0x20];
+};
+
+struct mlx5_ifc_ste_gre_v1_bits {
+ u8 gre_c_present[0x1];
+ u8 reserved_at_1[0x1];
+ u8 gre_k_present[0x1];
+ u8 gre_s_present[0x1];
+ u8 strict_src_route[0x1];
+ u8 recur[0x3];
+ u8 flags[0x5];
+ u8 version[0x3];
+ u8 gre_protocol[0x10];
+
+ u8 reserved_at_20[0x20];
+
+ u8 gre_key_h[0x18];
+ u8 gre_key_l[0x8];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_src_gvmi_qp_v1_bits {
+ u8 loopback_synd[0x8];
+ u8 reserved_at_8[0x7];
+ u8 functional_lb[0x1];
+ u8 source_gvmi[0x10];
+
+ u8 force_lb[0x1];
+ u8 reserved_at_21[0x1];
+ u8 source_is_requestor[0x1];
+ u8 reserved_at_23[0x5];
+ u8 source_qp[0x18];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_icmp_v1_bits {
+ u8 icmp_payload_data[0x20];
+
+ u8 icmp_header_data[0x20];
+
+ u8 icmp_type[0x8];
+ u8 icmp_code[0x8];
+ u8 reserved_at_50[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+#endif /* MLX5_IFC_DR_STE_V1_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index e1edc9c247b7..226a0d7bb06d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -26,6 +26,8 @@ enum mlx5dr_action_reformat_type {
DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2,
DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
+ DR_ACTION_REFORMAT_TYP_INSERT_HDR,
+ DR_ACTION_REFORMAT_TYP_REMOVE_HDR,
};
struct mlx5dr_match_parameters {
@@ -38,8 +40,6 @@ struct mlx5dr_action_dest {
struct mlx5dr_action *reformat;
};
-#ifdef CONFIG_MLX5_SW_STEERING
-
struct mlx5dr_domain *
mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
@@ -51,7 +51,11 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn);
struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags);
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
+ u16 uid);
+
+struct mlx5dr_table *
+mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft);
int mlx5dr_table_destroy(struct mlx5dr_table *table);
@@ -59,7 +63,7 @@ u32 mlx5dr_table_get_id(struct mlx5dr_table *table);
struct mlx5dr_matcher *
mlx5dr_matcher_create(struct mlx5dr_table *table,
- u16 priority,
+ u32 priority,
u8 match_criteria_enable,
struct mlx5dr_match_parameters *mask);
@@ -69,7 +73,8 @@ struct mlx5dr_rule *
mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
struct mlx5dr_match_parameters *value,
size_t num_actions,
- struct mlx5dr_action *actions[]);
+ struct mlx5dr_action *actions[],
+ u32 flow_source);
int mlx5dr_rule_destroy(struct mlx5dr_rule *rule);
@@ -77,6 +82,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
struct mlx5dr_action *action);
struct mlx5dr_action *
+mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num);
+
+struct mlx5dr_action *
mlx5dr_action_create_dest_table(struct mlx5dr_table *table);
struct mlx5dr_action *
@@ -85,24 +93,31 @@ mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
struct mlx5dr_action *
mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
- u32 vport, u8 vhca_id_valid,
+ u16 vport, u8 vhca_id_valid,
u16 vhca_id);
struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
- u32 num_of_dests);
+ u32 num_of_dests,
+ bool ignore_flow_level,
+ u32 flow_source);
struct mlx5dr_action *mlx5dr_action_create_drop(void);
struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
struct mlx5dr_action *
+mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id);
+
+struct mlx5dr_action *
mlx5dr_action_create_flow_counter(u32 counter_id);
struct mlx5dr_action *
mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
enum mlx5dr_action_reformat_type reformat_type,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
size_t data_sz,
void *data);
@@ -117,111 +132,61 @@ struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void);
struct mlx5dr_action *
mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr);
+struct mlx5dr_action *
+mlx5dr_action_create_aso(struct mlx5dr_domain *dmn,
+ u32 obj_id,
+ u8 return_reg_id,
+ u8 aso_type,
+ u8 init_color,
+ u8 meter_id);
+
int mlx5dr_action_destroy(struct mlx5dr_action *action);
static inline bool
mlx5dr_is_supported(struct mlx5_core_dev *dev)
{
- return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner);
+ return MLX5_CAP_GEN(dev, roce) &&
+ (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+ (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+ (MLX5_CAP_GEN(dev, steering_format_version) <=
+ MLX5_STEERING_FORMAT_CONNECTX_7)));
}
-#else /* CONFIG_MLX5_SW_STEERING */
-
-static inline struct mlx5dr_domain *
-mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; }
-
-static inline int
-mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; }
-
-static inline int
-mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; }
-
-static inline void
-mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
- struct mlx5dr_domain *peer_dmn) { }
-
-static inline struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; }
-
-static inline int
-mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; }
-
-static inline u32
-mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; }
-
-static inline struct mlx5dr_matcher *
-mlx5dr_matcher_create(struct mlx5dr_table *table,
- u16 priority,
- u8 match_criteria_enable,
- struct mlx5dr_match_parameters *mask) { return NULL; }
-
-static inline int
-mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; }
-
-static inline struct mlx5dr_rule *
-mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
- struct mlx5dr_match_parameters *value,
- size_t num_actions,
- struct mlx5dr_action *actions[]) { return NULL; }
-
-static inline int
-mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; }
-
-static inline int
-mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
- struct mlx5dr_action *action) { return 0; }
-
-static inline struct mlx5dr_action *
-mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; }
-
-static inline struct mlx5dr_action *
-mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
- struct mlx5_flow_table *ft) { return NULL; }
-
-static inline struct mlx5dr_action *
-mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
- u32 vport, u8 vhca_id_valid,
- u16 vhca_id) { return NULL; }
+/* buddy functions & structure */
-static inline struct mlx5dr_action *
-mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
- struct mlx5dr_action_dest *dests,
- u32 num_of_dests) { return NULL; }
+struct mlx5dr_icm_mr;
-static inline struct mlx5dr_action *
-mlx5dr_action_create_drop(void) { return NULL; }
+struct mlx5dr_icm_buddy_mem {
+ unsigned long **bitmap;
+ unsigned int *num_free;
+ u32 max_order;
+ struct list_head list_node;
+ struct mlx5dr_icm_mr *icm_mr;
+ struct mlx5dr_icm_pool *pool;
-static inline struct mlx5dr_action *
-mlx5dr_action_create_tag(u32 tag_value) { return NULL; }
+ /* This is the list of used chunks. HW may be accessing this memory */
+ struct list_head used_list;
+ u64 used_memory;
-static inline struct mlx5dr_action *
-mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; }
+ /* Hardware may be accessing this memory but at some future,
+ * undetermined time, it might cease to do so.
+ * sync_ste command sets them free.
+ */
+ struct list_head hot_list;
-static inline struct mlx5dr_action *
-mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
- enum mlx5dr_action_reformat_type reformat_type,
- size_t data_sz,
- void *data) { return NULL; }
-
-static inline struct mlx5dr_action *
-mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain,
- u32 flags,
- size_t actions_sz,
- __be64 actions[]) { return NULL; }
-
-static inline struct mlx5dr_action *
-mlx5dr_action_create_pop_vlan(void) { return NULL; }
-
-static inline struct mlx5dr_action *
-mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain,
- __be32 vlan_hdr) { return NULL; }
-
-static inline int
-mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; }
-
-static inline bool
-mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; }
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ struct list_head *miss_list;
+ u8 *hw_ste_arr;
+};
-#endif /* CONFIG_MLX5_SW_STEERING */
+int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int max_order);
+void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy);
+int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int order,
+ unsigned int *segment);
+void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int seg, unsigned int order);
#endif /* _MLX5DR_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index b1068500f1df..b6931bbe52d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -36,14 +36,14 @@
int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
{
- u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
int err;
MLX5_SET(alloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out);
if (!err)
*tdn = MLX5_GET(alloc_transport_domain_out, out,
transport_domain);
@@ -54,19 +54,18 @@ EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
MLX5_SET(dealloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
}
EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
{
- u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {};
int err;
MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
@@ -78,44 +77,39 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
}
EXPORT_SYMBOL(mlx5_core_create_rq);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in)
{
- u32 out[MLX5_ST_SZ_DW(modify_rq_out)];
-
MLX5_SET(modify_rq_in, in, rqn, rqn);
MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
- memset(out, 0, sizeof(out));
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_rq, in);
}
EXPORT_SYMBOL(mlx5_core_modify_rq);
void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
{
- u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
MLX5_SET(destroy_rq_in, in, rqn, rqn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_rq, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_rq);
int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
{
- u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
- int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {};
MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
MLX5_SET(query_rq_in, in, rqn, rqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ return mlx5_cmd_exec_inout(dev, query_rq, in, out);
}
EXPORT_SYMBOL(mlx5_core_query_rq);
int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
{
- u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {};
int err;
MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
@@ -126,34 +120,30 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
return err;
}
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in)
{
- u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
-
MLX5_SET(modify_sq_in, in, sqn, sqn);
MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_sq, in);
}
EXPORT_SYMBOL(mlx5_core_modify_sq);
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
{
- u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
MLX5_SET(destroy_sq_in, in, sqn, sqn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_sq, in);
}
int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
{
- u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
- int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {};
MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
MLX5_SET(query_sq_in, in, sqn, sqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ return mlx5_cmd_exec_inout(dev, query_sq, in, out);
}
EXPORT_SYMBOL(mlx5_core_query_sq);
@@ -182,24 +172,13 @@ out:
}
EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
-int mlx5_core_create_tir_out(struct mlx5_core_dev *dev,
- u32 *in, int inlen,
- u32 *out, int outlen)
-{
- MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
-
- return mlx5_cmd_exec(dev, in, inlen, out, outlen);
-}
-EXPORT_SYMBOL(mlx5_core_create_tir_out);
-
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *tirn)
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn)
{
u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
int err;
- err = mlx5_core_create_tir_out(dev, in, inlen,
- out, sizeof(out));
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+ err = mlx5_cmd_exec_inout(dev, create_tir, in, out);
if (!err)
*tirn = MLX5_GET(create_tir_out, out, tirn);
@@ -207,35 +186,30 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
}
EXPORT_SYMBOL(mlx5_core_create_tir);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
- int inlen)
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in)
{
- u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0};
-
MLX5_SET(modify_tir_in, in, tirn, tirn);
MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_tir, in);
}
void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
{
- u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
MLX5_SET(destroy_tir_in, in, tirn, tirn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_tir, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_tir);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *tisn)
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn)
{
- u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {};
int err;
MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, create_tis, in, out);
if (!err)
*tisn = MLX5_GET(create_tis_out, out, tisn);
@@ -243,33 +217,29 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
}
EXPORT_SYMBOL(mlx5_core_create_tis);
-int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
- int inlen)
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in)
{
- u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
-
MLX5_SET(modify_tis_in, in, tisn, tisn);
MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, modify_tis, in);
}
EXPORT_SYMBOL(mlx5_core_modify_tis);
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
{
- u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
MLX5_SET(destroy_tis_in, in, tisn, tisn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_tis, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_tis);
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqtn)
{
- u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
int err;
MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
@@ -284,7 +254,7 @@ EXPORT_SYMBOL(mlx5_core_create_rqt);
int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
int inlen)
{
- u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {};
MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
@@ -293,12 +263,11 @@ int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
{
- u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_rqt, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_rqt);
@@ -383,7 +352,7 @@ static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
int curr_state, int next_state,
u16 peer_vhca, u32 peer_sq)
{
- u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {};
void *rqc;
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
@@ -396,8 +365,7 @@ static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
MLX5_SET(modify_rq_in, in, rq_state, curr_state);
MLX5_SET(rqc, rqc, state, next_state);
- return mlx5_core_modify_rq(func_mdev, rqn,
- in, MLX5_ST_SZ_BYTES(modify_rq_in));
+ return mlx5_core_modify_rq(func_mdev, rqn, in);
}
static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
@@ -417,8 +385,7 @@ static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
MLX5_SET(modify_sq_in, in, sq_state, curr_state);
MLX5_SET(sqc, sqc, state, next_state);
- return mlx5_core_modify_sq(peer_mdev, sqn,
- in, MLX5_ST_SZ_BYTES(modify_sq_in));
+ return mlx5_core_modify_sq(peer_mdev, sqn, in);
}
static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp)
@@ -457,6 +424,15 @@ err_modify_sq:
return err;
}
+static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+}
+
static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
{
int i;
@@ -465,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
for (i = 0; i < hp->num_channels; i++)
mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
MLX5_RQC_STATE_RST, 0, 0);
-
/* unset peer SQs */
- if (hp->peer_gone)
- return;
- for (i = 0; i < hp->num_channels; i++)
- mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
- MLX5_SQC_STATE_RST, 0, 0);
+ if (!hp->peer_gone)
+ mlx5_hairpin_unpair_peer_sq(hp);
}
struct mlx5_hairpin *
@@ -518,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
mlx5_hairpin_destroy_queues(hp);
kfree(hp);
}
+
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ mlx5_hairpin_unpair_peer_sq(hp);
+
+ /* destroy peer SQ */
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+
+ hp->peer_gone = true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 0d006224d7b0..8455e79bc44a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -31,36 +31,33 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
{
- u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
int err;
MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *uarn = MLX5_GET(alloc_uar_out, out, uar);
- return err;
+ err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+ if (err)
+ return err;
+
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return 0;
}
-EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
MLX5_SET(dealloc_uar_in, in, uar, uarn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, dealloc_uar, in);
}
-EXPORT_SYMBOL(mlx5_cmd_free_uar);
static int uars_per_sys_page(struct mlx5_core_dev *mdev)
{
@@ -102,19 +99,21 @@ static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
int err = -ENOMEM;
phys_addr_t pfn;
int bfregs;
+ int node;
int i;
bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
- up = kzalloc(sizeof(*up), GFP_KERNEL);
+ node = mdev->priv.numa_node;
+ up = kzalloc_node(sizeof(*up), GFP_KERNEL, node);
if (!up)
return ERR_PTR(err);
up->mdev = mdev;
- up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
+ up->reg_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node);
if (!up->reg_bitmap)
goto error1;
- up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
+ up->fp_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node);
if (!up->fp_bitmap)
goto error1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 1faac31f74d0..d5c317325030 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,14 +36,16 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
+#include "sf/sf.h"
/* Mutex to hold while enabling or disabling RoCE */
static DEFINE_MUTEX(mlx5_roce_en_lock);
-static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
- u16 vport, u32 *out, int outlen)
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
{
- u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+ int err;
MLX5_SET(query_vport_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_STATE);
@@ -52,14 +54,9 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
if (vport)
MLX5_SET(query_vport_state_in, in, other_vport, 1);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
-}
-
-u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
- u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
-
- _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+ if (err)
+ return 0;
return MLX5_GET(query_vport_state_out, out, state);
}
@@ -67,8 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport, u8 state)
{
- u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {};
MLX5_SET(modify_vport_state_in, in, opcode,
MLX5_CMD_OP_MODIFY_VPORT_STATE);
@@ -77,13 +73,13 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
MLX5_SET(modify_vport_state_in, in, admin_state, state);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
}
static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
- u32 *out, int outlen)
+ u32 *out)
{
- u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
MLX5_SET(query_nic_vport_context_in, in, opcode,
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
@@ -91,26 +87,16 @@ static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
if (vport)
MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
-}
-
-static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
- int inlen)
-{
- u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
-
- MLX5_SET(modify_nic_vport_context_in, in, opcode,
- MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ return mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out);
}
int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
u16 vport, u8 *min_inline)
{
- u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
int err;
- err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+ err = mlx5_query_nic_vport_context(mdev, vport, out);
if (!err)
*min_inline = MLX5_GET(query_nic_vport_context_out, out,
nic_vport_context.min_wqe_inline_mode);
@@ -125,7 +111,7 @@ void mlx5_query_min_inline(struct mlx5_core_dev *mdev,
case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode))
break;
- /* fall through */
+ fallthrough;
case MLX5_CAP_INLINE_MODE_L2:
*min_inline_mode = MLX5_INLINE_MODE_L2;
break;
@@ -139,8 +125,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_min_inline);
int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
u16 vport, u8 min_inline)
{
- u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
- int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {};
void *nic_vport_ctx;
MLX5_SET(modify_nic_vport_context_in, in,
@@ -152,23 +137,20 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
in, nic_vport_context);
MLX5_SET(nic_vport_context, nic_vport_ctx,
min_wqe_inline_mode, min_inline);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- return mlx5_modify_nic_vport_context(mdev, in, inlen);
+ return mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
}
int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
u16 vport, bool other, u8 *addr)
{
- int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
u8 *out_addr;
- u32 *out;
int err;
- out = kvzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
nic_vport_context.permanent_address);
@@ -177,11 +159,10 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+ err = mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out);
if (!err)
ether_addr_copy(addr, &out_addr[2]);
- kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
@@ -193,7 +174,7 @@ int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
EXPORT_SYMBOL_GPL(mlx5_query_mac_address);
int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
- u16 vport, u8 *addr)
+ u16 vport, const u8 *addr)
{
void *in;
int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
@@ -216,8 +197,10 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
permanent_address);
ether_addr_copy(&perm_mac[2], addr);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
kvfree(in);
@@ -235,7 +218,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
if (!out)
return -ENOMEM;
- err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
if (!err)
*mtu = MLX5_GET(query_nic_vport_context_out, out,
nic_vport_context.mtu);
@@ -257,8 +240,10 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
kvfree(in);
return err;
@@ -292,10 +277,10 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
req_list_size = max_list_size;
}
- out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) +
req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
- out = kzalloc(out_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -322,7 +307,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
ether_addr_copy(addr_list[i], mac_addr);
}
out:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
@@ -332,7 +317,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
u8 addr_list[][ETH_ALEN],
int list_size)
{
- u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {};
void *nic_vport_ctx;
int max_list_size;
int in_sz;
@@ -350,8 +335,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
- memset(out, 0, sizeof(out));
- in = kzalloc(in_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -376,7 +360,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
}
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
- kfree(in);
+ kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
@@ -402,7 +386,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
list_size * MLX5_ST_SZ_BYTES(vlan_layout);
memset(out, 0, sizeof(out));
- in = kzalloc(in_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -427,7 +411,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
}
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
- kfree(in);
+ kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
@@ -437,19 +421,21 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
{
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
- mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
+ if (err)
+ goto out;
*system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
nic_vport_context.system_image_guid);
-
+out:
kvfree(out);
-
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
@@ -462,7 +448,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
if (!out)
return -ENOMEM;
- mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ mlx5_query_nic_vport_context(mdev, 0, out);
*node_guid = MLX5_GET64(query_nic_vport_context_out, out,
nic_vport_context.node_guid);
@@ -481,8 +467,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
void *in;
int err;
- if (!vport)
- return -EINVAL;
if (!MLX5_CAP_GEN(mdev, vport_group_manager))
return -EACCES;
@@ -498,8 +482,10 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
kvfree(in);
@@ -516,7 +502,7 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
if (!out)
return -ENOMEM;
- mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ mlx5_query_nic_vport_context(mdev, 0, out);
*qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
nic_vport_context.qkey_violation_counter);
@@ -556,8 +542,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
out_sz += nout * sizeof(*gid);
- in = kzalloc(in_sz, GFP_KERNEL);
- out = kzalloc(out_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
@@ -587,8 +573,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
gid->global.interface_id = tmp->global.interface_id;
out:
- kfree(in);
- kfree(out);
+ kvfree(in);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
@@ -621,8 +607,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
- in = kzalloc(in_sz, GFP_KERNEL);
- out = kzalloc(out_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
@@ -652,8 +638,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
*pkey = MLX5_GET_PR(pkey, pkarr, pkey);
out:
- kfree(in);
- kfree(out);
+ kvfree(in);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
@@ -664,7 +650,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep)
{
int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
- int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0};
+ int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {};
int is_group_manager;
void *out;
void *ctx;
@@ -672,7 +658,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
- out = kzalloc(out_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -691,7 +677,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
if (MLX5_CAP_GEN(dev, num_ports) == 2)
MLX5_SET(query_hca_vport_context_in, in, port_num, port_num);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ err = mlx5_cmd_exec_inout(dev, query_hca_vport_context, in, out);
if (err)
goto ex;
@@ -731,7 +717,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
system_image_guid);
ex:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
@@ -742,7 +728,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep;
int err;
- rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep)
return -ENOMEM;
@@ -750,7 +736,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
if (!err)
*sys_image_guid = rep->sys_image_guid;
- kfree(rep);
+ kvfree(rep);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
@@ -761,7 +747,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep;
int err;
- rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep)
return -ENOMEM;
@@ -769,7 +755,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
if (!err)
*node_guid = rep->node_guid;
- kfree(rep);
+ kvfree(rep);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
@@ -784,11 +770,11 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
int err;
- out = kzalloc(outlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
- err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ err = mlx5_query_nic_vport_context(mdev, vport, out);
if (err)
goto out;
@@ -800,7 +786,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
nic_vport_context.promisc_all);
out:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
@@ -825,8 +811,10 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
nic_vport_context.promisc_mc, promisc_mc);
MLX5_SET(modify_nic_vport_context_in, in,
nic_vport_context.promisc_all, promisc_all);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
kvfree(in);
@@ -865,8 +853,10 @@ int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
if (MLX5_CAP_GEN(mdev, disable_local_lb_uc))
MLX5_SET(modify_nic_vport_context_in, in,
field_select.disable_uc_local_lb, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
if (!err)
mlx5_core_dbg(mdev, "%s local_lb\n",
@@ -884,11 +874,11 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
int value;
int err;
- out = kzalloc(outlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
- err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
if (err)
goto out;
@@ -901,7 +891,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
*status = !value;
out:
- kfree(out);
+ kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
@@ -925,8 +915,10 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
state);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
kvfree(in);
@@ -965,16 +957,15 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
mutex_unlock(&mlx5_roce_en_lock);
return err;
}
-EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
+EXPORT_SYMBOL(mlx5_nic_vport_disable_roce);
int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
- int vf, u8 port_num, void *out,
- size_t out_sz)
+ int vf, u8 port_num, void *out)
{
- int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
- int is_group_manager;
- void *in;
- int err;
+ int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
+ int is_group_manager;
+ void *in;
+ int err;
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
in = kvzalloc(in_sz, GFP_KERNEL);
@@ -997,7 +988,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
if (MLX5_CAP_GEN(dev, num_ports) == 2)
MLX5_SET(query_vport_counter_in, in, port_num, port_num);
- err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ err = mlx5_cmd_exec_inout(dev, query_vport_counter, in, out);
free:
kvfree(in);
return err;
@@ -1008,8 +999,8 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
u8 other_vport, u64 *rx_discard_vport_down,
u64 *tx_discard_vport_down)
{
- u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
int err;
MLX5_SET(query_vnic_env_in, in, opcode,
@@ -1018,7 +1009,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
MLX5_SET(query_vnic_env_in, in, vport_number, vport);
MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
- err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out);
if (err)
return err;
@@ -1035,19 +1026,17 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *req)
{
int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
- u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
int is_group_manager;
+ void *ctx;
void *in;
int err;
- void *ctx;
mlx5_core_dbg(dev, "vf %d\n", vf);
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
- in = kzalloc(in_sz, GFP_KERNEL);
+ in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
- memset(out, 0, sizeof(out));
MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
if (other_vport) {
if (is_group_manager) {
@@ -1071,9 +1060,12 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID)
MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
- err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select,
+ req->cap_mask1_perm);
+ err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in);
ex:
- kfree(in);
+ kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
@@ -1094,14 +1086,24 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
goto free;
MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
- MLX5_SET(modify_nic_vport_context_in, in,
- nic_vport_context.affiliated_vhca_id,
- MLX5_CAP_GEN(master_mdev, vhca_id));
+ if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) {
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN_2(master_mdev, sw_vhca_id));
+ } else {
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN(master_mdev, vhca_id));
+ }
MLX5_SET(modify_nic_vport_context_in, in,
nic_vport_context.affiliation_criteria,
MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in);
if (err)
mlx5_nic_vport_disable_roce(port_mdev);
@@ -1126,8 +1128,10 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
nic_vport_context.affiliated_vhca_id, 0);
MLX5_SET(modify_nic_vport_context_in, in,
nic_vport_context.affiliation_criteria, 0);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
- err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in);
if (!err)
mlx5_nic_vport_disable_roce(port_mdev);
@@ -1139,32 +1143,31 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
{
int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- u64 tmp = 0;
+ u64 tmp;
+ int err;
if (mdev->sys_image_guid)
return mdev->sys_image_guid;
if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
- mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
else
- mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
+ err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
- mdev->sys_image_guid = tmp;
+ mdev->sys_image_guid = err ? 0 : tmp;
- return tmp;
+ return mdev->sys_image_guid;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
-/**
- * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
- *
- * @dev: Pointer to core device
- *
- * mlx5_eswitch_get_total_vports returns total number of vports for
- * the eswitch.
- */
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
{
- return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev);
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, function_id);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
}
-EXPORT_SYMBOL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 01f075fac276..3091dd014650 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -34,11 +34,6 @@
#include "wq.h"
#include "mlx5_core.h"
-static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
-{
- return ((u32)1 << log_sz) << log_stride;
-}
-
int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 4cadc336593f..4d629e5ddbc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -123,7 +123,7 @@ static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
wq->cur_sz++;
}
-static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n)
{
wq->wqe_ctr += n;
wq->cur_sz += n;
@@ -172,6 +172,11 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
return !equal && !smaller;
}
+static inline u16 mlx5_wq_cyc_get_counter(struct mlx5_wq_cyc *wq)
+{
+ return wq->wqe_ctr;
+}
+
static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
{
return wq->fbc.sz_m1 + 1;
@@ -290,4 +295,14 @@ static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq)
*wq->db = cpu_to_be32(wq->wqe_ctr);
}
+static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq)
+{
+ return wq->head;
+}
+
+static inline u16 mlx5_wq_ll_get_counter(struct mlx5_wq_ll *wq)
+{
+ return wq->wqe_ctr;
+}
+
#endif /* __MLX5_WQ_H__ */