aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig15
-rw-r--r--drivers/infiniband/core/addr.c1
-rw-r--r--drivers/infiniband/core/cache.c14
-rw-r--r--drivers/infiniband/core/cm.c113
-rw-r--r--drivers/infiniband/core/cma.c406
-rw-r--r--drivers/infiniband/core/cma_configfs.c3
-rw-r--r--drivers/infiniband/core/cma_priv.h1
-rw-r--r--drivers/infiniband/core/cq.c1
-rw-r--r--drivers/infiniband/core/device.c43
-rw-r--r--drivers/infiniband/core/iwpm_util.h1
-rw-r--r--drivers/infiniband/core/lag.c5
-rw-r--r--drivers/infiniband/core/nldev.c7
-rw-r--r--drivers/infiniband/core/rdma_core.c2
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c2
-rw-r--r--drivers/infiniband/core/rw.c46
-rw-r--r--drivers/infiniband/core/sa_query.c252
-rw-r--r--drivers/infiniband/core/sysfs.c3
-rw-r--r--drivers/infiniband/core/ucma.c44
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c13
-rw-r--r--drivers/infiniband/core/umem_odp.c9
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c8
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c8
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c383
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c3
-rw-r--r--drivers/infiniband/core/verbs.c21
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c5
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c50
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h7
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c99
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h9
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c26
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c21
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c16
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c1
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c5
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h3
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h289
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c4
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c11
-rw-r--r--drivers/infiniband/hw/erdma/Kconfig12
-rw-r--r--drivers/infiniband/hw/erdma/Makefile4
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h289
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c1422
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.h167
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c487
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c201
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c320
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h514
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c605
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c555
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c1459
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h335
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c5
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c2
-rw-r--r--drivers/infiniband/hw/hfi1/common.h55
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c1
-rw-r--r--drivers/infiniband/hw/hfi1/device.c1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c6
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c2
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c1
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c10
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c3
-rw-r--r--drivers/infiniband/hw/hfi1/init.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h2
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_main.c27
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_rx.c5
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c42
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c6
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c3
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c12
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h8
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c13
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h3
-rw-r--r--drivers/infiniband/hw/hns/Kconfig17
-rw-r--r--drivers/infiniband/hw/hns/Makefile7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c108
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h202
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c90
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h193
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c38
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c4675
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h1147
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c1436
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h407
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c35
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c149
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c92
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c17
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c139
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c243
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c108
-rw-r--r--drivers/infiniband/hw/irdma/cm.c162
-rw-r--r--drivers/infiniband/hw/irdma/cm.h7
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c612
-rw-r--r--drivers/infiniband/hw/irdma/defs.h9
-rw-r--r--drivers/infiniband/hw/irdma/hmc.c105
-rw-r--r--drivers/infiniband/hw/irdma/hmc.h53
-rw-r--r--drivers/infiniband/hw/irdma/hw.c331
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_if.c5
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c1
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h1
-rw-r--r--drivers/infiniband/hw/irdma/main.c36
-rw-r--r--drivers/infiniband/hw/irdma/main.h50
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h41
-rw-r--r--drivers/infiniband/hw/irdma/pble.c77
-rw-r--r--drivers/infiniband/hw/irdma/pble.h27
-rw-r--r--drivers/infiniband/hw/irdma/protos.h90
-rw-r--r--drivers/infiniband/hw/irdma/puda.c139
-rw-r--r--drivers/infiniband/hw/irdma/puda.h43
-rw-r--r--drivers/infiniband/hw/irdma/status.h71
-rw-r--r--drivers/infiniband/hw/irdma/type.h114
-rw-r--r--drivers/infiniband/hw/irdma/uda.c42
-rw-r--r--drivers/infiniband/hw/irdma/uda.h46
-rw-r--r--drivers/infiniband/hw/irdma/uk.c129
-rw-r--r--drivers/infiniband/hw/irdma/user.h63
-rw-r--r--drivers/infiniband/hw/irdma/utils.c422
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c586
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h28
-rw-r--r--drivers/infiniband/hw/irdma/ws.c19
-rw-r--r--drivers/infiniband/hw/irdma/ws.h2
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c1
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c29
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c54
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c1
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c3
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c9
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c122
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c53
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c1
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c407
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c1
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c54
-rw-r--r--drivers/infiniband/hw/mlx5/main.c212
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h247
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1084
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c95
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c9
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c2
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c1
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c761
-rw-r--r--drivers/infiniband/hw/mlx5/umr.h97
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c377
-rw-r--r--drivers/infiniband/hw/mlx5/wr.h60
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c15
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c25
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c20
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c18
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c17
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c20
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h1
-rw-r--r--drivers/infiniband/hw/qedr/main.c10
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h1
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c17
-rw-r--r--drivers/infiniband/hw/qib/qib.h6
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c27
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c11
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c13
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c20
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c10
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c10
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c14
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c4
-rw-r--r--drivers/infiniband/sw/rxe/Makefile1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c115
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c66
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h62
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c536
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c244
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c119
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c47
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c741
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c555
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h134
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c193
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c133
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c224
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c396
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c131
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c119
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c37
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c181
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h72
-rw-r--r--drivers/infiniband/sw/siw/Kconfig5
-rw-r--r--drivers/infiniband/sw/siw/siw.h8
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c14
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c12
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c47
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c18
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c1
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c83
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h38
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c169
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c117
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c153
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c131
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h4
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c4
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c1
-rw-r--r--drivers/infiniband/ulp/rtrs/Makefile10
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c22
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c148
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c15
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h86
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c1232
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h42
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h38
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c32
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c123
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h88
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c736
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h34
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c101
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.h37
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c86
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h14
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c162
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h18
281 files changed, 17013 insertions, 16126 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 33d3ce9c888e..aa36ac618e72 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -78,20 +78,21 @@ config INFINIBAND_VIRT_DMA
def_bool !HIGHMEM
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
-source "drivers/infiniband/hw/mthca/Kconfig"
-source "drivers/infiniband/hw/qib/Kconfig"
+source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
+source "drivers/infiniband/hw/erdma/Kconfig"
+source "drivers/infiniband/hw/hfi1/Kconfig"
+source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
+source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
-source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
-source "drivers/infiniband/hw/usnic/Kconfig"
-source "drivers/infiniband/hw/hns/Kconfig"
-source "drivers/infiniband/hw/bnxt_re/Kconfig"
-source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
+source "drivers/infiniband/hw/qib/Kconfig"
+source "drivers/infiniband/hw/usnic/Kconfig"
+source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 65e3e7df8a4b..f253295795f0 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,7 +37,6 @@
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 0c98dd3dee67..4084d05a4510 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -33,7 +33,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/if_vlan.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -955,7 +955,7 @@ int rdma_query_gid(struct ib_device *device, u32 port_num,
{
struct ib_gid_table *table;
unsigned long flags;
- int res = -EINVAL;
+ int res;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
@@ -963,9 +963,15 @@ int rdma_query_gid(struct ib_device *device, u32 port_num,
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- if (index < 0 || index >= table->sz ||
- !is_gid_entry_valid(table->data_vec[index]))
+ if (index < 0 || index >= table->sz) {
+ res = -EINVAL;
goto done;
+ }
+
+ if (!is_gid_entry_valid(table->data_vec[index])) {
+ res = -ENOENT;
+ goto done;
+ }
memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
res = 0;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c903b74f46a4..1f9938a2c475 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -175,6 +175,7 @@ struct cm_device {
struct cm_av {
struct cm_port *port;
struct rdma_ah_attr ah_attr;
+ u16 dlid_datapath;
u16 pkey_index;
u8 timeout;
};
@@ -617,7 +618,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
- __be64 service_mask = cm_id_priv->id.service_mask;
unsigned long flags;
spin_lock_irqsave(&cm.lock, flags);
@@ -625,9 +625,16 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
- if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id) &&
- (cm_id_priv->id.device == cur_cm_id_priv->id.device)) {
+
+ if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+ link = &(*link)->rb_left;
+ else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+ link = &(*link)->rb_right;
+ else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
+ link = &(*link)->rb_left;
+ else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
+ link = &(*link)->rb_right;
+ else {
/*
* Sharing an ib_cm_id with different handlers is not
* supported
@@ -643,17 +650,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
spin_unlock_irqrestore(&cm.lock, flags);
return cur_cm_id_priv;
}
-
- if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
- link = &(*link)->rb_left;
- else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
- link = &(*link)->rb_right;
- else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
- link = &(*link)->rb_left;
- else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
- link = &(*link)->rb_right;
- else
- link = &(*link)->rb_right;
}
cm_id_priv->listen_sharecount++;
rb_link_node(&cm_id_priv->service_node, parent, link);
@@ -670,12 +666,7 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device,
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
- if ((cm_id_priv->id.service_mask & service_id) ==
- cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device)) {
- refcount_inc(&cm_id_priv->refcount);
- return cm_id_priv;
- }
+
if (device < cm_id_priv->id.device)
node = node->rb_left;
else if (device > cm_id_priv->id.device)
@@ -684,8 +675,10 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device,
node = node->rb_left;
else if (be64_gt(service_id, cm_id_priv->id.service_id))
node = node->rb_right;
- else
- node = node->rb_right;
+ else {
+ refcount_inc(&cm_id_priv->refcount);
+ return cm_id_priv;
+ }
}
return NULL;
}
@@ -1158,22 +1151,17 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
}
EXPORT_SYMBOL(ib_destroy_cm_id);
-static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id,
- __be64 service_mask)
+static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id)
{
- service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
- service_id &= service_mask;
if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
(service_id != IB_CM_ASSIGN_SERVICE_ID))
return -EINVAL;
- if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
+ if (service_id == IB_CM_ASSIGN_SERVICE_ID)
cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
- } else {
+ else
cm_id_priv->id.service_id = service_id;
- cm_id_priv->id.service_mask = service_mask;
- }
+
return 0;
}
@@ -1185,12 +1173,8 @@ static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id,
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
- * @service_mask: Mask applied to service ID used to listen across a
- * range of service IDs. If set to 0, the service ID is matched
- * exactly. This parameter is ignored if %service_id is set to
- * IB_CM_ASSIGN_SERVICE_ID.
*/
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id)
{
struct cm_id_private *cm_id_priv =
container_of(cm_id, struct cm_id_private, id);
@@ -1203,7 +1187,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
goto out;
}
- ret = cm_init_listen(cm_id_priv, service_id, service_mask);
+ ret = cm_init_listen(cm_id_priv, service_id);
if (ret)
goto out;
@@ -1251,9 +1235,11 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
if (IS_ERR(cm_id_priv))
return ERR_CAST(cm_id_priv);
- err = cm_init_listen(cm_id_priv, service_id, 0);
- if (err)
+ err = cm_init_listen(cm_id_priv, service_id);
+ if (err) {
+ ib_destroy_cm_id(&cm_id_priv->id);
return ERR_PTR(err);
+ }
spin_lock_irq(&cm_id_priv->lock);
listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
@@ -1319,6 +1305,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct sa_path_rec *pri_path = param->primary_path;
struct sa_path_rec *alt_path = param->alternate_path;
bool pri_ext = false;
+ __be16 lid;
if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
@@ -1378,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg,
htons(ntohl(sa_path_get_dlid(
pri_path)))));
} else {
+
+ if (param->primary_path_inbound) {
+ lid = param->primary_path_inbound->ib.dlid;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(lid));
+ } else
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+
/* Work-around until there's a way to obtain remote LID info */
- IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
- be16_to_cpu(IB_LID_PERMISSIVE));
IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
be16_to_cpu(IB_LID_PERMISSIVE));
}
@@ -1520,7 +1514,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
}
}
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
@@ -1536,6 +1529,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
cm_move_av_from_path(&cm_id_priv->av, &av);
+ if (param->primary_path_outbound)
+ cm_id_priv->av.dlid_datapath =
+ be16_to_cpu(param->primary_path_outbound->ib.dlid);
+
if (param->alternate_path)
cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
@@ -1630,14 +1627,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
u32 lid;
if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(primary_path,
- IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
- req_msg));
+ sa_path_set_dlid(primary_path, wc->slid);
sa_path_set_slid(primary_path,
IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
req_msg));
@@ -1674,7 +1670,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
primary_path->dgid =
*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
@@ -1732,7 +1729,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
if (sa_path_is_roce(alt_path))
alt_path->roce.route_resolved = false;
}
- cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
+ cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
}
static u16 cm_get_bth_pkey(struct cm_work *work)
@@ -2077,7 +2074,6 @@ static int cm_req_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
cm_id_priv->id.service_id =
cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_id_priv->tid = req_msg->hdr.tid;
cm_id_priv->timeout_ms = cm_convert_to_ms(
IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
@@ -2146,7 +2142,7 @@ static int cm_req_handler(struct cm_work *work)
if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
+ &work->path[1], work->mad_recv_wc->wc);
if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
@@ -2171,6 +2167,10 @@ static int cm_req_handler(struct cm_work *work)
NULL, 0);
goto rejected;
}
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB)
+ cm_id_priv->av.dlid_datapath =
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg);
+
if (cm_req_has_alt_path(req_msg)) {
ret = cm_init_av_by_path(&work->path[1], NULL,
&cm_id_priv->alt_av);
@@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work)
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
+ case IB_CM_MRA_REP_RCVD:
ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
@@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
ib_cancel_mad(cm_id_priv->msg);
break;
- case IB_CM_MRA_REP_RCVD:
- break;
case IB_CM_TIMEWAIT:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
[CM_DREQ_COUNTER]);
@@ -3322,7 +3321,7 @@ static int cm_lap_handler(struct cm_work *work)
ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
if (ret) {
rdma_destroy_ah_attr(&ah_attr);
- return -EINVAL;
+ goto deref;
}
spin_lock_irq(&cm_id_priv->lock);
@@ -3485,7 +3484,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
cm_move_av_from_path(&cm_id_priv->av, &av);
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
cm_id_priv->max_cm_retries = param->max_cm_retries;
if (cm_id->state != IB_CM_IDLE) {
@@ -3560,7 +3558,6 @@ static int cm_sidr_req_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
cm_id_priv->id.service_id =
cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_id_priv->tid = sidr_req_msg->hdr.tid;
wc = work->mad_recv_wc->wc;
@@ -4133,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) &&
+ cm_id_priv->av.dlid_datapath &&
+ (cm_id_priv->av.dlid_datapath != 0xffff))
+ qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 835ac54d4a24..26d1772179b8 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -67,8 +69,8 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
-static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
- union ib_gid *mgid);
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
@@ -168,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
@@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
return hdr->ip_version >> 4;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
+}
+
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
@@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET)
+ return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
+ (char *)&((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
@@ -766,6 +887,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
unsigned int p;
u16 pkey, index;
enum ib_port_state port_state;
+ int ret;
int i;
cma_dev = NULL;
@@ -784,9 +906,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !rdma_query_gid(cur_dev->device,
- p, i, &gid);
- i++) {
+
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(cur_dev->device, p, i,
+ &gid);
+ if (ret)
+ continue;
+
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -855,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
@@ -1428,7 +1556,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
return false;
memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_iif = net_dev->ifindex;
+ fl4.flowi4_oif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
@@ -1713,8 +1841,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
- (struct sockaddr *)&req->listen_addr_storage,
- (struct sockaddr *)&req->src_addr_storage)) {
+ (struct sockaddr *)&req->src_addr_storage,
+ (struct sockaddr *)&req->listen_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
@@ -1840,17 +1968,19 @@ static void destroy_mc(struct rdma_id_private *id_priv,
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
- if (ndev) {
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
union ib_gid mgid;
- cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
- &mgid);
-
- if (!send_only)
- cma_igmp_send(ndev, &mgid, false);
-
- dev_put(ndev);
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
+ cma_igmp_send(ndev, &mgid, false);
}
+ dev_put(ndev);
cancel_work_sync(&mc->iboe_join.work);
}
@@ -1875,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_cancel_operation(id_priv, state);
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1895,6 +2026,8 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_id_put(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+ kfree(id_priv->id.route.path_rec_inbound);
+ kfree(id_priv->id.route.path_rec_outbound);
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
@@ -2110,14 +2243,14 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
goto err;
rt = &id->route;
- rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
- rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
- GFP_KERNEL);
+ rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc_array(rt->num_pri_alt_paths,
+ sizeof(*rt->path_rec), GFP_KERNEL);
if (!rt->path_rec)
goto err;
rt->path_rec[0] = *path;
- if (rt->num_paths == 2)
+ if (rt->num_pri_alt_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
@@ -2634,7 +2767,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
{
struct rdma_id_private *id_priv;
- if (id->qp_type != IB_QPT_RC)
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2686,26 +2819,72 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
}
EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+static void route_set_path_rec_inbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_inbound) {
+ route->path_rec_inbound =
+ kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL);
+ if (!route->path_rec_inbound)
+ return;
+ }
+
+ *route->path_rec_inbound = *path_rec;
+}
+
+static void route_set_path_rec_outbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_outbound) {
+ route->path_rec_outbound =
+ kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL);
+ if (!route->path_rec_outbound)
+ return;
+ }
+
+ *route->path_rec_outbound = *path_rec;
+}
+
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
- void *context)
+ int num_prs, void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
+ int i;
route = &work->id->id.route;
- if (!status) {
- route->num_paths = 1;
- *route->path_rec = *path_rec;
- } else {
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
- work->event.status = status;
- pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
- status);
+ if (status)
+ goto fail;
+
+ for (i = 0; i < num_prs; i++) {
+ if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP))
+ *route->path_rec = path_rec[i];
+ else if (path_rec[i].flags & IB_PATH_INBOUND)
+ route_set_path_rec_inbound(work, &path_rec[i]);
+ else if (path_rec[i].flags & IB_PATH_OUTBOUND)
+ route_set_path_rec_outbound(work, &path_rec[i]);
+ }
+ if (!route->path_rec) {
+ status = -EINVAL;
+ goto fail;
}
+ route->num_pri_alt_paths = 1;
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
queue_work(cma_wq, &work->work);
}
@@ -2950,7 +3129,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
dev_put(ndev);
}
- id->route.num_paths = 1;
+ id->route.num_pri_alt_paths = 1;
return 0;
err_free:
@@ -3083,7 +3262,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err1;
}
- route->num_paths = 1;
+ route->num_pri_alt_paths = 1;
ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
@@ -3143,7 +3322,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
- route->num_paths = 0;
+ route->num_pri_alt_paths = 0;
err1:
kfree(work);
return ret;
@@ -3164,8 +3343,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -3362,22 +3544,30 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
const struct sockaddr *dst_addr)
{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) &&
- dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr(id, src_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
}
- return rdma_bind_addr(id, src_addr);
+ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
}
/*
@@ -3617,7 +3807,7 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = prandom_u32_max(remaining) + low;
retry:
if (last_used_port != rover) {
struct rdma_bind_list *bind_list;
@@ -4033,8 +4223,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -4093,8 +4282,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -4125,7 +4313,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
}
req.primary_path = &route->path_rec[0];
- if (route->num_paths == 2)
+ req.primary_path_inbound = route->path_rec_inbound;
+ req.primary_path_outbound = route->path_rec_outbound;
+ if (route->num_pri_alt_paths == 2)
req.alternate_path = &route->path_rec[1];
req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
@@ -4908,10 +5098,87 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
+ cma_id_get(current_id);
+ queue_work(cma_wq, &current_id->id.net_work);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -5134,6 +5401,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
@@ -5148,6 +5416,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -5160,6 +5429,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 9ac16e0db761..7b68b3ea979f 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -293,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 757a0ef79872..b7354c94cf1b 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -64,6 +64,7 @@ struct rdma_id_private {
struct list_head listen_item;
struct list_head listen_list;
};
+ struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 433b426729d4..a70876a0a231 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -2,7 +2,6 @@
/*
* Copyright (c) 2015 HGST, a Western Digital Company.
*/
-#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 22a4adda7981..b69e2c4e4d2a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
+static struct workqueue_struct *ib_unreg_wq;
/*
* Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -421,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
return ret;
}
- strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+ strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
ret = rename_compat_devs(ibdev);
downgrade_write(&devices_rwsem);
@@ -1216,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name)
ret = -ENFILE;
goto out;
}
- strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+ strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
&last_id, GFP_KERNEL);
@@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
WARN_ON(!refcount_read(&ib_dev->refcount));
WARN_ON(!ib_dev->ops.dealloc_driver);
get_device(&ib_dev->dev);
- if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+ if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
put_device(&ib_dev->dev);
}
EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -2461,7 +2462,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
++i) {
ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
- return ret;
+ continue;
+
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
@@ -2612,7 +2614,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
SET_DEVICE_OP(dev_ops, create_flow);
- SET_DEVICE_OP(dev_ops, create_flow_action_esp);
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
SET_DEVICE_OP(dev_ops, create_srq);
@@ -2675,7 +2676,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
- SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
@@ -2752,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
static int __init ib_core_init(void)
{
- int ret;
+ int ret = -ENOMEM;
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
return -ENOMEM;
+ ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_unreg_wq)
+ goto err;
+
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
- if (!ib_comp_wq) {
- ret = -ENOMEM;
- goto err;
- }
+ if (!ib_comp_wq)
+ goto err_unbound;
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
- if (!ib_comp_unbound_wq) {
- ret = -ENOMEM;
+ if (!ib_comp_unbound_wq)
goto err_comp;
- }
ret = class_register(&ib_class);
if (ret) {
@@ -2814,10 +2815,18 @@ static int __init ib_core_init(void)
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
- roce_gid_mgmt_init();
+ ret = roce_gid_mgmt_init();
+ if (ret) {
+ pr_warn("Couldn't init RoCE GID management\n");
+ goto err_parent;
+ }
return 0;
+err_parent:
+ rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
err_compat:
unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
err_sa:
@@ -2832,6 +2841,8 @@ err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
+err_unbound:
+ destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -2853,7 +2864,7 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
- flush_workqueue(system_unbound_wq);
+ destroy_workqueue(ib_unreg_wq);
WARN_ON(!xa_empty(&clients));
WARN_ON(!xa_empty(&devices));
}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 3a42ad43056e..d6fc8402158a 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -33,7 +33,6 @@
#ifndef _IWPM_UTIL_H
#define _IWPM_UTIL_H
-#include <linux/module.h>
#include <linux/io.h>
#include <linux/in.h>
#include <linux/in6.h>
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
index 7063e41eaf26..c77d7d2559a1 100644
--- a/drivers/infiniband/core/lag.c
+++ b/drivers/infiniband/core/lag.c
@@ -7,8 +7,7 @@
#include <rdma/ib_cache.h>
#include <rdma/lag.h>
-static struct sk_buff *rdma_build_skb(struct ib_device *device,
- struct net_device *netdev,
+static struct sk_buff *rdma_build_skb(struct net_device *netdev,
struct rdma_ah_attr *ah_attr,
gfp_t flags)
{
@@ -86,7 +85,7 @@ static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
struct net_device *slave;
struct sk_buff *skb;
- skb = rdma_build_skb(device, master, ah_attr, flags);
+ skb = rdma_build_skb(master, ah_attr, flags);
if (!skb)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index f5aacaf7fb8e..12dc97067ed2 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1739,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
+ if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}
@@ -1951,9 +1951,10 @@ static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
u32 port)
{
struct rdma_hw_stats *stats;
- int rem, i, index, ret = 0;
struct nlattr *entry_attr;
unsigned long *target;
+ int rem, i, ret = 0;
+ u32 index;
stats = ib_get_hw_stats_port(device, port);
if (!stats)
@@ -2536,7 +2537,7 @@ void __init nldev_init(void)
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
}
-void __exit nldev_exit(void)
+void nldev_exit(void)
{
rdma_nl_unregister(RDMA_NL_NLDEV);
}
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 94d83b665a2f..29b1ab1d5f93 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
- * access locks, since only a single one of them is is allowed
+ * access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 68197e576433..e958c43dd28f 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -250,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
/**
* is_upper_ndev_bond_master_filter - Check if a given netdevice
- * is bond master device of netdevice of the the RDMA device of port.
+ * is bond master device of netdevice of the RDMA device of port.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: Pointer to rdma netdevice
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 5a3bd41b331c..8367974b7998 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -2,6 +2,7 @@
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
*/
+#include <linux/memremap.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/pci-p2pdma.h>
@@ -273,33 +274,6 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
return 1;
}
-static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
- u32 sg_cnt, enum dma_data_direction dir)
-{
- if (is_pci_p2pdma_page(sg_page(sg)))
- pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
- else
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
-}
-
-static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt,
- enum dma_data_direction dir)
-{
- int nents;
-
- if (is_pci_p2pdma_page(sg_page(sgt->sgl))) {
- if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
- return 0;
- nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl,
- sgt->orig_nents, dir);
- if (!nents)
- return -EIO;
- sgt->nents = nents;
- return 0;
- }
- return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0);
-}
-
/**
* rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
* @ctx: context to initialize
@@ -326,7 +300,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
};
int ret;
- ret = rdma_rw_map_sgtable(dev, &sgt, dir);
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
if (ret)
return ret;
sg_cnt = sgt.nents;
@@ -365,7 +339,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
return ret;
out_unmap_sg:
- rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_init);
@@ -413,12 +387,12 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
return -EINVAL;
}
- ret = rdma_rw_map_sgtable(dev, &sgt, dir);
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
if (ret)
return ret;
if (prot_sg_cnt) {
- ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir);
+ ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0);
if (ret)
goto out_unmap_sg;
}
@@ -485,9 +459,9 @@ out_free_ctx:
kfree(ctx->reg);
out_unmap_prot_sg:
if (prot_sgt.nents)
- rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0);
out_unmap_sg:
- rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
@@ -620,7 +594,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
break;
}
- rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
@@ -648,8 +622,8 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
kfree(ctx->reg);
if (prot_sg_cnt)
- rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
- rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 74ecd7456a11..0de83d9a4985 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/random.h>
@@ -51,6 +50,7 @@
#include <rdma/ib_marshall.h>
#include <rdma/ib_addr.h>
#include <rdma/opa_addr.h>
+#include <rdma/rdma_cm.h>
#include "sa.h"
#include "core_priv.h"
@@ -105,7 +105,8 @@ struct ib_sa_device {
};
struct ib_sa_query {
- void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
+ void (*callback)(struct ib_sa_query *sa_query, int status,
+ int num_prs, struct ib_sa_mad *mad);
void (*release)(struct ib_sa_query *);
struct ib_sa_client *client;
struct ib_sa_port *port;
@@ -117,6 +118,12 @@ struct ib_sa_query {
u32 seq; /* Local svc request sequence number */
unsigned long timeout; /* Local svc timeout */
u8 path_use; /* How will the pathrecord be used */
+
+ /* A separate buffer to save pathrecords of a response, as in cases
+ * like IB/netlink, mulptiple pathrecords are supported, so that
+ * mad->data is not large enough to hold them
+ */
+ void *resp_pr_data;
};
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
@@ -124,7 +131,8 @@ struct ib_sa_query {
#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_path_query {
- void (*callback)(int, struct sa_path_rec *, void *);
+ void (*callback)(int status, struct sa_path_rec *rec,
+ int num_paths, void *context);
void *context;
struct ib_sa_query sa_query;
struct sa_path_rec *conv_pr;
@@ -713,7 +721,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
sa_rec->reversible != 0)
- query->path_use = LS_RESOLVE_PATH_USE_GMP;
+ query->path_use = LS_RESOLVE_PATH_USE_ALL;
else
query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
header->path_use = query->path_use;
@@ -866,50 +874,81 @@ static void send_handler(struct ib_mad_agent *agent,
static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
const struct nlmsghdr *nlh)
{
+ struct ib_path_rec_data *srec, *drec;
+ struct ib_sa_path_query *path_query;
struct ib_mad_send_wc mad_send_wc;
- struct ib_sa_mad *mad = NULL;
const struct nlattr *head, *curr;
- struct ib_path_rec_data *rec;
- int len, rem;
+ struct ib_sa_mad *mad = NULL;
+ int len, rem, num_prs = 0;
u32 mask = 0;
int status = -EIO;
- if (query->callback) {
- head = (const struct nlattr *) nlmsg_data(nlh);
- len = nlmsg_len(nlh);
- switch (query->path_use) {
- case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
- mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
- break;
+ if (!query->callback)
+ goto out;
- case LS_RESOLVE_PATH_USE_ALL:
- case LS_RESOLVE_PATH_USE_GMP:
- default:
- mask = IB_PATH_PRIMARY | IB_PATH_GMP |
- IB_PATH_BIDIRECTIONAL;
- break;
+ path_query = container_of(query, struct ib_sa_path_query, sa_query);
+ mad = query->mad_buf->mad;
+ if (!path_query->conv_pr &&
+ (be16_to_cpu(mad->mad_hdr.attr_id) == IB_SA_ATTR_PATH_REC)) {
+ /* Need a larger buffer for possible multiple PRs */
+ query->resp_pr_data = kvcalloc(RDMA_PRIMARY_PATH_MAX_REC_NUM,
+ sizeof(*drec), GFP_KERNEL);
+ if (!query->resp_pr_data) {
+ query->callback(query, -ENOMEM, 0, NULL);
+ return;
}
- nla_for_each_attr(curr, head, len, rem) {
- if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
- rec = nla_data(curr);
- /*
- * Get the first one. In the future, we may
- * need to get up to 6 pathrecords.
- */
- if ((rec->flags & mask) == mask) {
- mad = query->mad_buf->mad;
- mad->mad_hdr.method |=
- IB_MGMT_METHOD_RESP;
- memcpy(mad->data, rec->path_rec,
- sizeof(rec->path_rec));
- status = 0;
- break;
- }
- }
+ }
+
+ head = (const struct nlattr *) nlmsg_data(nlh);
+ len = nlmsg_len(nlh);
+ switch (query->path_use) {
+ case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
+ mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
+ break;
+
+ case LS_RESOLVE_PATH_USE_ALL:
+ mask = IB_PATH_PRIMARY;
+ break;
+
+ case LS_RESOLVE_PATH_USE_GMP:
+ default:
+ mask = IB_PATH_PRIMARY | IB_PATH_GMP |
+ IB_PATH_BIDIRECTIONAL;
+ break;
+ }
+
+ drec = (struct ib_path_rec_data *)query->resp_pr_data;
+ nla_for_each_attr(curr, head, len, rem) {
+ if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD)
+ continue;
+
+ srec = nla_data(curr);
+ if ((srec->flags & mask) != mask)
+ continue;
+
+ status = 0;
+ if (!drec) {
+ memcpy(mad->data, srec->path_rec,
+ sizeof(srec->path_rec));
+ num_prs = 1;
+ break;
}
- query->callback(query, status, mad);
+
+ memcpy(drec, srec, sizeof(*drec));
+ drec++;
+ num_prs++;
+ if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM)
+ break;
}
+ if (!status)
+ mad->mad_hdr.method |= IB_MGMT_METHOD_RESP;
+
+ query->callback(query, status, num_prs, mad);
+ kvfree(query->resp_pr_data);
+ query->resp_pr_data = NULL;
+
+out:
mad_send_wc.send_buf = query->mad_buf;
mad_send_wc.status = IB_WC_SUCCESS;
send_handler(query->mad_buf->mad_agent, &mad_send_wc);
@@ -1035,10 +1074,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
unsigned long flags;
- struct ib_sa_query *query;
+ struct ib_sa_query *query = NULL, *iter;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
- int found = 0;
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
@@ -1046,20 +1084,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_for_each_entry(query, &ib_nl_request_list, list) {
+ list_for_each_entry(iter, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
- if (nlh->nlmsg_seq == query->seq) {
- found = !ib_sa_query_cancelled(query);
- if (found)
- list_del(&query->list);
+ if (nlh->nlmsg_seq == iter->seq) {
+ if (!ib_sa_query_cancelled(iter)) {
+ list_del(&iter->list);
+ query = iter;
+ }
break;
}
}
- if (!found) {
+ if (!query) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}
@@ -1412,41 +1451,90 @@ static int opa_pr_query_possible(struct ib_sa_client *client,
return PR_IB_SUPPORTED;
}
+static void ib_sa_pr_callback_single(struct ib_sa_path_query *query,
+ int status, struct ib_sa_mad *mad)
+{
+ struct sa_path_rec rec = {};
+
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, 1, query->context);
+ } else {
+ query->callback(status, &rec, 1, query->context);
+ }
+}
+
+/**
+ * ib_sa_pr_callback_multiple() - Parse path records then do callback.
+ *
+ * In a multiple-PR case the PRs are saved in "query->resp_pr_data"
+ * (instead of"mad->data") and with "ib_path_rec_data" structure format,
+ * so that rec->flags can be set to indicate the type of PR.
+ * This is valid only in IB fabric.
+ */
+static void ib_sa_pr_callback_multiple(struct ib_sa_path_query *query,
+ int status, int num_prs,
+ struct ib_path_rec_data *rec_data)
+{
+ struct sa_path_rec *rec;
+ int i;
+
+ rec = kvcalloc(num_prs, sizeof(*rec), GFP_KERNEL);
+ if (!rec) {
+ query->callback(-ENOMEM, NULL, 0, query->context);
+ return;
+ }
+
+ for (i = 0; i < num_prs; i++) {
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec_data[i].path_rec, rec + i);
+ rec[i].rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(rec + i);
+ rec[i].flags = rec_data[i].flags;
+ }
+
+ query->callback(status, rec, num_prs, query->context);
+ kvfree(rec);
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_prs,
struct ib_sa_mad *mad)
{
struct ib_sa_path_query *query =
container_of(sa_query, struct ib_sa_path_query, sa_query);
+ struct sa_path_rec rec;
- if (mad) {
- struct sa_path_rec rec;
-
- if (sa_query->flags & IB_SA_QUERY_OPA) {
- ib_unpack(opa_path_rec_table,
- ARRAY_SIZE(opa_path_rec_table),
- mad->data, &rec);
- rec.rec_type = SA_PATH_REC_TYPE_OPA;
- query->callback(status, &rec, query->context);
- } else {
- ib_unpack(path_rec_table,
- ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.rec_type = SA_PATH_REC_TYPE_IB;
- sa_path_set_dmac_zero(&rec);
-
- if (query->conv_pr) {
- struct sa_path_rec opa;
+ if (!mad || !num_prs) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
- memset(&opa, 0, sizeof(struct sa_path_rec));
- sa_convert_path_ib_to_opa(&opa, &rec);
- query->callback(status, &opa, query->context);
- } else {
- query->callback(status, &rec, query->context);
- }
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ if (num_prs != 1) {
+ query->callback(-EINVAL, NULL, 0, query->context);
+ return;
}
- } else
- query->callback(status, NULL, query->context);
+
+ ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, num_prs, query->context);
+ } else {
+ if (!sa_query->resp_pr_data)
+ ib_sa_pr_callback_single(query, status, mad);
+ else
+ ib_sa_pr_callback_multiple(query, status, num_prs,
+ sa_query->resp_pr_data);
+ }
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
@@ -1490,7 +1578,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_path_rec *resp,
- void *context),
+ int num_paths, void *context),
void *context,
struct ib_sa_query **sa_query)
{
@@ -1589,7 +1677,7 @@ err1:
EXPORT_SYMBOL(ib_sa_path_rec_get);
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_prs,
struct ib_sa_mad *mad)
{
struct ib_sa_mcmember_query *query =
@@ -1681,7 +1769,7 @@ err1:
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_paths,
struct ib_sa_mad *mad)
{
struct ib_sa_guidinfo_query *query =
@@ -1791,7 +1879,7 @@ static void ib_classportinfo_cb(void *context)
}
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_prs,
struct ib_sa_mad *mad)
{
unsigned long flags;
@@ -1967,13 +2055,13 @@ static void send_handler(struct ib_mad_agent *agent,
/* No callback -- already got recv */
break;
case IB_WC_RESP_TIMEOUT_ERR:
- query->callback(query, -ETIMEDOUT, NULL);
+ query->callback(query, -ETIMEDOUT, 0, NULL);
break;
case IB_WC_WR_FLUSH_ERR:
- query->callback(query, -EINTR, NULL);
+ query->callback(query, -EINTR, 0, NULL);
break;
default:
- query->callback(query, -EIO, NULL);
+ query->callback(query, -EIO, 0, NULL);
break;
}
@@ -2001,10 +2089,10 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
mad_recv_wc->recv_buf.mad->mad_hdr.status ?
- -EINVAL : 0,
+ -EINVAL : 0, 1,
(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
else
- query->callback(query, -EIO, NULL);
+ query->callback(query, -EIO, 0, NULL);
}
ib_free_recv_mad(mad_recv_wc);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index a3f84b50c46a..84c53bd2a52d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -433,6 +433,7 @@ static struct attribute *port_default_attrs[] = {
&ib_port_attr_link_layer.attr,
NULL
};
+ATTRIBUTE_GROUPS(port_default);
static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
@@ -774,7 +775,7 @@ static void ib_port_gid_attr_release(struct kobject *kobj)
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
- .default_attrs = port_default_attrs
+ .default_groups = port_default_groups,
};
static struct kobj_type gid_attr_type = {
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2b72c4fa9550..bf42650f125b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -95,6 +95,7 @@ struct ucma_context {
u64 uid;
struct list_head list;
+ struct list_head mc_list;
struct work_struct close_work;
};
@@ -105,6 +106,7 @@ struct ucma_multicast {
u64 uid;
u8 join_state;
+ struct list_head list;
struct sockaddr_storage addr;
};
@@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_WORK(&ctx->close_work, ucma_close_id);
init_completion(&ctx->comp);
+ INIT_LIST_HEAD(&ctx->mc_list);
/* So list_del() will work if we don't do ucma_finish_ctx() */
INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
@@ -484,19 +487,19 @@ err1:
static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
- struct ucma_multicast *mc;
- unsigned long index;
+ struct ucma_multicast *mc, *tmp;
- xa_for_each(&multicast_table, index, mc) {
- if (mc->ctx != ctx)
- continue;
+ xa_lock(&multicast_table);
+ list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+ list_del(&mc->list);
/*
* At this point mc->ctx->ref is 0 so the mc cannot leave the
* lock on the reader and this is enough serialization
*/
- xa_erase(&multicast_table, index);
+ __xa_erase(&multicast_table, mc->id);
kfree(mc);
}
+ xa_unlock(&multicast_table);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
@@ -751,8 +754,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
{
struct rdma_dev_addr *dev_addr;
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
rdma_addr_get_dgid(dev_addr,
@@ -778,8 +781,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
(union ib_gid *)&resp->ib_route[0].dgid);
@@ -918,7 +921,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
if (!resp)
return -ENOMEM;
- resp->num_paths = ctx->cm_id->route.num_paths;
+ resp->num_paths = ctx->cm_id->route.num_pri_alt_paths;
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
@@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file,
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
- if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ xa_lock(&multicast_table);
+ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
GFP_KERNEL)) {
ret = -ENOMEM;
goto err_free_mc;
}
+ list_add_tail(&mc->list, &ctx->mc_list);
+ xa_unlock(&multicast_table);
+
mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
@@ -1500,8 +1507,11 @@ err_leave_multicast:
mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
err_xa_erase:
- xa_erase(&multicast_table, mc->id);
+ xa_lock(&multicast_table);
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
err_free_mc:
+ xa_unlock(&multicast_table);
kfree(mc);
err_put_ctx:
ucma_put_ctx(ctx);
@@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
mc = ERR_PTR(-EINVAL);
else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
- else
- __xa_erase(&multicast_table, mc->id);
- xa_unlock(&multicast_table);
if (IS_ERR(mc)) {
+ xa_unlock(&multicast_table);
ret = PTR_ERR(mc);
goto out;
}
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+ xa_unlock(&multicast_table);
+
mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
mutex_unlock(&mc->ctx->mutex);
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index f0760741f281..04c04e6d24c3 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -16,9 +16,9 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
{
struct sg_table *sgt;
struct scatterlist *sg;
- struct dma_fence *fence;
unsigned long start, end, cur = 0;
unsigned int nmap = 0;
+ long ret;
int i;
dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
@@ -68,10 +68,13 @@ wait_fence:
* may be not up-to-date. Wait for the exporter to finish
* the migration.
*/
- fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv);
- if (fence)
- return dma_fence_wait(fence, false);
-
+ ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
+ DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -ETIMEDOUT;
return 0;
}
EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 7a47343d11f9..e9fa22d31c23 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -43,8 +43,6 @@
#include <linux/hmm.h>
#include <linux/pagemap.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
@@ -227,7 +225,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
const struct mmu_interval_notifier_ops *ops)
{
struct ib_umem_odp *umem_odp;
- struct mm_struct *mm;
int ret;
if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
@@ -241,7 +238,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
umem_odp->umem.length = size;
umem_odp->umem.address = addr;
umem_odp->umem.writable = ib_access_writable(access);
- umem_odp->umem.owning_mm = mm = current->mm;
+ umem_odp->umem.owning_mm = current->mm;
umem_odp->notifier.ops = ops;
umem_odp->page_shift = PAGE_SHIFT;
@@ -456,14 +453,14 @@ retry:
break;
}
}
- /* upon sucesss lock should stay on hold for the callee */
+ /* upon success lock should stay on hold for the callee */
if (!ret)
ret = dma_index - start_idx;
else
mutex_unlock(&umem_odp->umem_mutex);
out_put_mm:
- mmput(owning_mm);
+ mmput_async(owning_mm);
out_put_task:
if (owning_process)
put_task_struct(owning_process);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index d1345d76d9b1..4796f6a8828c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -337,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
+ resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
@@ -739,6 +739,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
rdma_restrack_set_name(&mr->res, NULL);
@@ -861,8 +862,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
mr->pd = new_pd;
atomic_inc(&new_pd->usecnt);
}
- if (cmd.flags & IB_MR_REREG_TRANS)
+ if (cmd.flags & IB_MR_REREG_TRANS) {
mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
+ }
}
memset(&resp, 0, sizeof(resp));
@@ -1399,7 +1402,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
IB_SIGNAL_REQ_WR;
attr.qp_type = cmd->qp_type;
- attr.create_flags = 0;
attr.cap.max_send_wr = cmd->max_send_wr;
attr.cap.max_recv_wr = cmd->max_recv_wr;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 990f0724acc6..d9799706c58e 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -337,6 +337,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
break;
+ case UVERBS_ATTR_TYPE_RAW_FD:
+ if (uattr->attr_data.reserved || uattr->len != 0 ||
+ uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX)
+ return -EINVAL;
+ /* _uverbs_get_const_signed() is the accessor */
+ e->ptr_attr.data = uattr->data_s64;
+ break;
+
case UVERBS_ATTR_TYPE_IDRS_ARRAY:
return uverbs_process_idrs_array(pbundle, attr_uapi,
&e->objs_arr_attr, uattr,
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index b8d715c68ca4..11a080646916 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device,
struct rdma_ah_attr *src = ah_attr;
struct rdma_ah_attr conv_ah;
- memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
+ memset(&dst->grh, 0, sizeof(dst->grh));
if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
(rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) &&
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index d42ed7ff223e..0ddcf6da66c4 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -46,385 +46,6 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
return action->device->ops.destroy_flow_action(action);
}
-static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
- u32 flags, bool is_modify)
-{
- u64 verbs_flags = flags;
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
-
- if (is_modify && uverbs_attr_is_valid(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
-
- return verbs_flags;
-};
-
-static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
-{
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
- &keymat->keymat.aes_gcm;
-
- if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return -EOPNOTSUPP;
-
- if (aes_gcm->key_len != 32 &&
- aes_gcm->key_len != 24 &&
- aes_gcm->key_len != 16)
- return -EINVAL;
-
- if (aes_gcm->icv_len != 16 &&
- aes_gcm->icv_len != 8 &&
- aes_gcm->icv_len != 12)
- return -EINVAL;
-
- return 0;
-}
-
-static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
-};
-
-static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* This is used in order to modify an esp flow action with an enabled
- * replay protection to a disabled one. This is only supported via
- * modify, as in create verb we can simply drop the REPLAY attribute and
- * achieve the same thing.
- */
- return is_modify ? 0 : -EINVAL;
-}
-
-static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* Some replay protections could always be enabled without validating
- * anything.
- */
- return 0;
-}
-
-static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify) = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
-};
-
-static int parse_esp_ip(enum ib_flow_spec_type proto,
- const void __user *val_ptr,
- size_t len, union ib_flow_spec *out)
-{
- int ret;
- const struct ib_uverbs_flow_ipv4_filter ipv4 = {
- .src_ip = cpu_to_be32(0xffffffffUL),
- .dst_ip = cpu_to_be32(0xffffffffUL),
- .proto = 0xff,
- .tos = 0xff,
- .ttl = 0xff,
- .flags = 0xff,
- };
- const struct ib_uverbs_flow_ipv6_filter ipv6 = {
- .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .flow_label = cpu_to_be32(0xffffffffUL),
- .next_hdr = 0xff,
- .traffic_class = 0xff,
- .hop_limit = 0xff,
- };
- union {
- struct ib_uverbs_flow_ipv4_filter ipv4;
- struct ib_uverbs_flow_ipv6_filter ipv6;
- } user_val = {};
- const void *user_pmask;
- size_t val_len;
-
- /* If the flow IPv4/IPv6 flow specifications are extended, the mask
- * should be changed as well.
- */
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
- sizeof(ipv4.flags) != sizeof(ipv4));
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
- sizeof(ipv6.reserved) != sizeof(ipv6));
-
- switch (proto) {
- case IB_FLOW_SPEC_IPV4:
- if (len > sizeof(user_val.ipv4) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
- len - sizeof(user_val.ipv4)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv4));
- ret = copy_from_user(&user_val.ipv4, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv4;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (len > sizeof(user_val.ipv6) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
- len - sizeof(user_val.ipv6)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv6));
- ret = copy_from_user(&user_val.ipv6, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv6;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
- &user_val,
- val_len, out);
-}
-
-static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uverbs_flow_action_esp_encap uverbs_encap;
- int ret;
-
- ret = uverbs_copy_from(&uverbs_encap, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
- if (ret)
- return ret;
-
- /* We currently support only one encap */
- if (uverbs_encap.next_ptr)
- return -EOPNOTSUPP;
-
- if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
- uverbs_encap.type != IB_FLOW_SPEC_IPV6)
- return -EOPNOTSUPP;
-
- return parse_esp_ip(uverbs_encap.type,
- u64_to_user_ptr(uverbs_encap.val_ptr),
- uverbs_encap.len,
- &out->spec);
-}
-
-struct ib_flow_action_esp_attr {
- struct ib_flow_action_attrs_esp hdr;
- struct ib_flow_action_attrs_esp_keymats keymat;
- struct ib_flow_action_attrs_esp_replays replay;
- /* We currently support only one spec */
- struct ib_flow_spec_list encap;
-};
-
-#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
-static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct uverbs_attr_bundle *attrs,
- struct ib_flow_action_esp_attr *esp_attr,
- bool is_modify)
-{
- struct ib_uverbs_flow_action_esp uverbs_esp = {};
- int ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
- if (IS_UVERBS_COPY_ERR(ret))
- return ret;
-
- /* This can be called from FLOW_ACTION_ESP_MODIFY where
- * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
- */
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
- ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
- if (ret)
- return ret;
-
- if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
- return -EOPNOTSUPP;
-
- esp_attr->hdr.spi = uverbs_esp.spi;
- esp_attr->hdr.seq = uverbs_esp.seq;
- esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
- esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
- }
- esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
- is_modify);
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
- esp_attr->keymat.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- if (ret)
- return ret;
-
- ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
- if (ret)
- return ret;
-
- esp_attr->hdr.keymat = &esp_attr->keymat;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
- esp_attr->replay.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
-
- ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
- if (ret)
- return ret;
-
- ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
- is_modify);
- if (ret)
- return ret;
-
- esp_attr->hdr.replay = &esp_attr->replay;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
- ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
- if (ret)
- return ret;
-
- esp_attr->hdr.encap = &esp_attr->encap;
- }
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
- struct ib_device *ib_dev = attrs->context->device;
- int ret;
- struct ib_flow_action *action;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!ib_dev->ops.create_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
- if (ret)
- return ret;
-
- /* No need to check as this attribute is marked as MANDATORY */
- action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr,
- attrs);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, ib_dev,
- IB_FLOW_ACTION_ESP);
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
- struct ib_flow_action *action = uobj->object;
- int ret;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!action->device->ops.modify_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
- if (ret)
- return ret;
-
- if (action->type != IB_FLOW_ACTION_ESP)
- return -EINVAL;
-
- return action->device->ops.modify_flow_action_esp(action,
- &esp_attr.hdr,
- attrs);
-}
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
- aes_key),
- },
-};
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
- size),
- },
-};
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
- UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_WRITE,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_FLOW_ACTION_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
@@ -435,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_FLOW_ACTION,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY));
const struct uapi_definition uverbs_def_obj_flow_action[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 2f2c7646fce1..a02916a3a79c 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -447,6 +447,9 @@ static int uapi_finalize(struct uverbs_api *uapi)
uapi->num_write_ex = max_write_ex + 1;
data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
sizeof(*uapi->write_methods), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
data[i] = &uapi->notsupp_method;
uapi->write_methods = data;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c18634bec212..26b021f43ba4 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -268,9 +268,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
return ERR_PTR(-ENOMEM);
pd->device = device;
- pd->uobject = NULL;
- pd->__internal_mr = NULL;
- atomic_set(&pd->usecnt, 0);
pd->flags = flags;
rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
@@ -284,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
}
rdma_restrack_add(&pd->res);
- if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
@@ -311,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
pd->__internal_mr = mr;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
+ if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
pd->local_dma_lkey = pd->__internal_mr->lkey;
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -341,11 +338,6 @@ int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
pd->__internal_mr = NULL;
}
- /* uverbs manipulates usecnt with proper locking, while the kabi
- * requires the caller to guarantee we can't race here.
- */
- WARN_ON(atomic_read(&pd->usecnt));
-
ret = pd->device->ops.dealloc_pd(pd, udata);
if (ret)
return ret;
@@ -1046,7 +1038,7 @@ struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
if (ret) {
rdma_restrack_put(&srq->res);
- atomic_dec(&srq->pd->usecnt);
+ atomic_dec(&pd->usecnt);
if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
atomic_dec(&srq->ext.xrc.xrcd->usecnt);
if (ib_srq_has_cq(srq->srq_type))
@@ -2139,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mr *mr;
if (access_flags & IB_ACCESS_ON_DEMAND) {
- if (!(pd->device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
+ if (!(pd->device->attrs.kernel_cap_flags &
+ IBK_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
return ERR_PTR(-EINVAL);
}
@@ -2153,9 +2145,12 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return mr;
mr->device = pd->device;
+ mr->type = IB_MR_TYPE_USER;
mr->pd = pd;
mr->dm = NULL;
atomic_inc(&pd->usecnt);
+ mr->iova = virt_addr;
+ mr->length = length;
rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
rdma_restrack_parent_name(&mr->res, &pd->res);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index fba0b3be903e..6b3a88046125 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
+obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 79401e6c6aa9..785c37cae3c0 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -173,7 +173,7 @@ struct bnxt_re_dev {
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
- /* QP for for handling QP1 packets */
+ /* QP for handling QP1 packets */
struct bnxt_re_gsi_context gsi_ctx;
struct bnxt_re_stats stats;
atomic_t nq_alloc_cnt;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 29cc0d14399a..989edc789633 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -146,13 +146,13 @@ int bnxt_re_query_device(struct ib_device *ibdev,
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SHUTDOWN_PORT
| IB_DEVICE_SYS_IMAGE_GUID
- | IB_DEVICE_LOCAL_DMA_LKEY
| IB_DEVICE_RESIZE_MAX_WR
| IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_N_NOTIFY_CQ
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_MEM_MGT_EXTENSIONS;
+ ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
ib_attr->max_send_sge = dev_attr->max_qp_sges;
ib_attr->max_recv_sge = dev_attr->max_qp_sges;
ib_attr->max_sge_rd = dev_attr->max_qp_sges;
@@ -262,13 +262,12 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str)
int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num,
u16 index, u16 *pkey)
{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ if (index > 0)
+ return -EINVAL;
- /* Ignore port_num */
+ *pkey = IB_DEFAULT_PKEY_FULL;
- memset(pkey, 0, sizeof(*pkey));
- return bnxt_qplib_get_pkey(&rdev->qplib_res,
- &rdev->qplib_res.pkey_tbl, index, pkey);
+ return 0;
}
int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b44944fb9b24..8c0c80a8d338 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -725,7 +725,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
/* ib device init */
ibdev->node_type = RDMA_NODE_IB_CA;
- strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
+ strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
strlen(BNXT_RE_DESC) + 5);
ibdev->phys_port_cnt = 1;
@@ -893,7 +893,6 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
qplib_srq);
struct ib_event ib_event;
- int rc = 0;
ib_event.device = &srq->rdev->ibdev;
ib_event.element.srq = &srq->ib_srq;
@@ -907,7 +906,7 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
(*srq->ib_srq.event_handler)(&ib_event,
srq->ib_srq.srq_context);
}
- return rc;
+ return 0;
}
static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index ca88849559bf..96e581ced50e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -46,6 +46,7 @@
#include <linux/delay.h>
#include <linux/prefetch.h>
#include <linux/if_ether.h>
+#include <rdma/ib_mad.h>
#include "roce_hsi.h"
@@ -1232,7 +1233,7 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_modify_qp req;
struct creq_modify_qp_resp resp;
- u16 cmd_flags = 0, pkey;
+ u16 cmd_flags = 0;
u32 temp32[4];
u32 bmask;
int rc;
@@ -1255,11 +1256,9 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS)
req.access = qp->access;
- if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) {
- if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl,
- qp->pkey_index, &pkey))
- req.pkey = cpu_to_le16(pkey);
- }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY)
+ req.pkey = cpu_to_le16(IB_DEFAULT_PKEY_FULL);
+
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY)
req.qkey = cpu_to_le32(qp->qkey);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 3de854727460..061b2895dd9b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -555,7 +555,7 @@ skip_ctx_setup:
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- kfree(rcfw->cmdq.cmdq_bitmap);
+ bitmap_free(rcfw->cmdq.cmdq_bitmap);
kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
@@ -572,7 +572,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_sg_info sginfo = {};
struct bnxt_qplib_cmdq_ctx *cmdq;
struct bnxt_qplib_creq_ctx *creq;
- u32 bmap_size = 0;
rcfw->pdev = res->pdev;
cmdq = &rcfw->cmdq;
@@ -613,13 +612,10 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->crsqe_tbl)
goto fail;
- bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long);
- cmdq->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
+ cmdq->cmdq_bitmap = bitmap_zalloc(rcfw->cmdq_depth, GFP_KERNEL);
if (!cmdq->cmdq_bitmap)
goto fail;
- cmdq->bmap_size = bmap_size;
-
/* Allocate one extra to hold the QP1 entries */
rcfw->qp_tbl_size = qp_tbl_sz + 1;
rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
@@ -667,8 +663,8 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
iounmap(cmdq->cmdq_mbox.reg.bar_reg);
iounmap(creq->creq_db.reg.bar_reg);
- indx = find_first_bit(cmdq->cmdq_bitmap, cmdq->bmap_size);
- if (indx != cmdq->bmap_size)
+ indx = find_first_bit(cmdq->cmdq_bitmap, rcfw->cmdq_depth);
+ if (indx != rcfw->cmdq_depth)
dev_err(&rcfw->pdev->dev,
"disabling RCFW with pending cmd-bit %lx\n", indx);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 82faa4e4cda8..0a3d8e7da3d4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -152,7 +152,6 @@ struct bnxt_qplib_cmdq_ctx {
wait_queue_head_t waitq;
unsigned long flags;
unsigned long *cmdq_bitmap;
- u32 bmap_size;
u32 seq_num;
};
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index bc1ba4b51ba4..126d4f26f75a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -649,31 +649,6 @@ static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
}
-static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl)
-{
- if (!pkey_tbl->tbl)
- dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
- else
- kfree(pkey_tbl->tbl);
-
- pkey_tbl->tbl = NULL;
- pkey_tbl->max = 0;
- pkey_tbl->active = 0;
-}
-
-static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl,
- u16 max)
-{
- pkey_tbl->tbl = kcalloc(max, sizeof(u16), GFP_KERNEL);
- if (!pkey_tbl->tbl)
- return -ENOMEM;
-
- pkey_tbl->max = max;
- return 0;
-};
-
/* PDs */
int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd)
{
@@ -843,24 +818,6 @@ unmap_io:
return -ENOMEM;
}
-/* PKEYs */
-static void bnxt_qplib_cleanup_pkey_tbl(struct bnxt_qplib_pkey_tbl *pkey_tbl)
-{
- memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
- pkey_tbl->active = 0;
-}
-
-static void bnxt_qplib_init_pkey_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl)
-{
- u16 pkey = 0xFFFF;
-
- memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
-
- /* pkey default = 0xFFFF */
- bnxt_qplib_add_pkey(res, pkey_tbl, &pkey, false);
-}
-
/* Stats */
static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
struct bnxt_qplib_stats *stats)
@@ -891,21 +848,18 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res)
{
- bnxt_qplib_cleanup_pkey_tbl(&res->pkey_tbl);
bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl);
}
int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
{
bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev);
- bnxt_qplib_init_pkey_tbl(res, &res->pkey_tbl);
return 0;
}
void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
{
- bnxt_qplib_free_pkey_tbl(res, &res->pkey_tbl);
bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
bnxt_qplib_free_pd_tbl(&res->pd_tbl);
bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
@@ -924,10 +878,6 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
if (rc)
goto fail;
- rc = bnxt_qplib_alloc_pkey_tbl(res, &res->pkey_tbl, dev_attr->max_pkey);
- if (rc)
- goto fail;
-
rc = bnxt_qplib_alloc_pd_tbl(res, &res->pd_tbl, dev_attr->max_pd);
if (rc)
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index e1411a2352a7..982e2c96dac2 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -185,12 +185,6 @@ struct bnxt_qplib_sgid_tbl {
u8 *vlan;
};
-struct bnxt_qplib_pkey_tbl {
- u16 *tbl;
- u16 max;
- u16 active;
-};
-
struct bnxt_qplib_dpi {
u32 dpi;
void __iomem *dbr;
@@ -258,7 +252,6 @@ struct bnxt_qplib_res {
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
struct bnxt_qplib_sgid_tbl sgid_tbl;
- struct bnxt_qplib_pkey_tbl pkey_tbl;
struct bnxt_qplib_dpi_tbl dpi_tbl;
bool prio;
bool is_vf;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 379e715ebd30..b802981b7171 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -146,17 +146,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
- attr->max_pkey = le32_to_cpu(sb->max_pkeys);
- /*
- * Some versions of FW reports more than 0xFFFF.
- * Restrict it for now to 0xFFFF to avoid
- * reporting trucated value
- */
- if (attr->max_pkey > 0xFFFF) {
- /* ib_port_attr::pkey_tbl_len is u16 */
- attr->max_pkey = 0xFFFF;
- }
-
+ attr->max_pkey = 1;
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
attr->l2_db_size = (sb->l2_db_space_size + 1) *
(0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
@@ -414,93 +404,6 @@ int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
return rc;
}
-/* pkeys */
-int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
- u16 *pkey)
-{
- if (index == 0xFFFF) {
- *pkey = 0xFFFF;
- return 0;
- }
- if (index >= pkey_tbl->max) {
- dev_err(&res->pdev->dev,
- "Index %d exceeded PKEY table max (%d)\n",
- index, pkey_tbl->max);
- return -EINVAL;
- }
- memcpy(pkey, &pkey_tbl->tbl[index], sizeof(*pkey));
- return 0;
-}
-
-int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update)
-{
- int i, rc = 0;
-
- if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "PKEY table not allocated\n");
- return -EINVAL;
- }
-
- /* Do we need a pkey_lock here? */
- if (!pkey_tbl->active) {
- dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
- return -ENOMEM;
- }
- for (i = 0; i < pkey_tbl->max; i++) {
- if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
- break;
- }
- if (i == pkey_tbl->max) {
- dev_err(&res->pdev->dev,
- "PKEY 0x%04x not found in the pkey table\n", *pkey);
- return -ENOMEM;
- }
- memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
- pkey_tbl->active--;
-
- /* unlock */
- return rc;
-}
-
-int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update)
-{
- int i, free_idx, rc = 0;
-
- if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "PKEY table not allocated\n");
- return -EINVAL;
- }
-
- /* Do we need a pkey_lock here? */
- if (pkey_tbl->active == pkey_tbl->max) {
- dev_err(&res->pdev->dev, "PKEY table is full\n");
- return -ENOMEM;
- }
- free_idx = pkey_tbl->max;
- for (i = 0; i < pkey_tbl->max; i++) {
- if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
- return -EALREADY;
- else if (!pkey_tbl->tbl[i] && free_idx == pkey_tbl->max)
- free_idx = i;
- }
- if (free_idx == pkey_tbl->max) {
- dev_err(&res->pdev->dev,
- "PKEY table is FULL but count is not MAX??\n");
- return -ENOMEM;
- }
- /* Add PKEY to the pkey_tbl */
- memcpy(&pkey_tbl->tbl[free_idx], pkey, sizeof(*pkey));
- pkey_tbl->active++;
-
- /* unlock */
- return rc;
-}
-
/* AH */
int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
bool block)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index a18f568cb23e..5939e8fc8353 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -255,15 +255,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u16 gid_idx,
const u8 *smac);
-int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
- u16 *pkey);
-int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update);
-int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update);
int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_dev_attr *attr, bool vf);
int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 913f39ee4416..499a425a3379 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -734,7 +734,7 @@ static int send_connect(struct c4iw_ep *ep)
&ep->com.remote_addr;
int ret;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- u32 isn = (prandom_u32() & ~7UL) - 1;
+ u32 isn = (get_random_u32() & ~7UL) - 1;
struct net_device *netdev;
u64 params;
@@ -2468,30 +2468,24 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
opt2 |= CCTRL_ECN_V(1);
}
- skb_get(skb);
- rpl = cplhdr(skb);
if (!is_t4(adapter_type)) {
- skb_trim(skb, roundup(sizeof(*rpl5), 16));
- rpl5 = (void *)rpl;
- INIT_TP_WR(rpl5, ep->hwtid);
- } else {
- skb_trim(skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- }
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
+ u32 isn = (get_random_u32() & ~7UL) - 1;
- if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
- u32 isn = (prandom_u32() & ~7UL) - 1;
+ skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL);
+ rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16));
+ rpl = (void *)rpl5;
+ INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid);
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
opt2 |= T5_ISS_F;
- rpl5 = (void *)rpl;
- memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
if (peer2peer)
isn += 4;
rpl5->iss = cpu_to_be32(isn);
pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
+ } else {
+ skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+ rpl = __skb_put_zero(skb, sizeof(*rpl));
+ INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid);
}
rpl->opt0 = cpu_to_be64(opt0);
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
index 724d23297b35..280d61466855 100644
--- a/drivers/infiniband/hw/cxgb4/id_table.c
+++ b/drivers/infiniband/hw/cxgb4/id_table.c
@@ -54,12 +54,12 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
if (obj < alloc->max) {
if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last += prandom_u32() % RANDOM_SKIP;
+ alloc->last += prandom_u32_max(RANDOM_SKIP);
else
alloc->last = obj + 1;
if (alloc->last >= alloc->max)
alloc->last = 0;
- set_bit(obj, alloc->table);
+ __set_bit(obj, alloc->table);
obj += alloc->start;
} else
obj = -1;
@@ -75,37 +75,32 @@ void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
obj -= alloc->start;
spin_lock_irqsave(&alloc->lock, flags);
- clear_bit(obj, alloc->table);
+ __clear_bit(obj, alloc->table);
spin_unlock_irqrestore(&alloc->lock, flags);
}
int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
u32 reserved, u32 flags)
{
- int i;
-
alloc->start = start;
alloc->flags = flags;
if (flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last = prandom_u32() % RANDOM_SKIP;
+ alloc->last = prandom_u32_max(RANDOM_SKIP);
else
alloc->last = 0;
- alloc->max = num;
+ alloc->max = num;
spin_lock_init(&alloc->lock);
- alloc->table = kmalloc_array(BITS_TO_LONGS(num), sizeof(long),
- GFP_KERNEL);
+ alloc->table = bitmap_zalloc(num, GFP_KERNEL);
if (!alloc->table)
return -ENOMEM;
- bitmap_zero(alloc->table, num);
if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
- for (i = 0; i < reserved; ++i)
- set_bit(i, alloc->table);
+ bitmap_set(alloc->table, 0, reserved);
return 0;
}
void c4iw_id_table_free(struct c4iw_id_table *alloc)
{
- kfree(alloc->table);
+ bitmap_free(alloc->table);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 12f33467c672..50cb2259bf87 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -314,7 +314,6 @@ enum db_state {
struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
- u32 device_cap_flags;
struct xarray cqs;
struct xarray qps;
struct xarray mrs;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 0c8fd5a85fcb..246b739ddb2b 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -41,6 +41,7 @@
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include <linux/inetdevice.h>
+#include <net/addrconf.h>
#include <linux/io.h>
#include <asm/irq.h>
@@ -264,10 +265,14 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
return -EINVAL;
dev = to_c4iw_dev(ibdev);
- memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
+ addrconf_addr_eui48((u8 *)&props->sys_image_guid,
+ dev->rdev.lldi.ports[0]->dev_addr);
props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
props->fw_ver = dev->rdev.lldi.fw_vers;
- props->device_cap_flags = dev->device_cap_flags;
+ props->device_cap_flags = IB_DEVICE_MEM_WINDOW;
+ props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ if (fastreg_support)
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->page_size_cap = T4_PAGESIZE_MASK;
props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
@@ -525,11 +530,8 @@ void c4iw_register_device(struct work_struct *work)
struct c4iw_dev *dev = ctx->dev;
pr_debug("c4iw_dev %p\n", dev);
- memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
- memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
- if (fastreg_support)
- dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
+ dev->rdev.lldi.ports[0]->dev_addr);
dev->ibdev.local_dma_lkey = 0;
dev->ibdev.node_type = RDMA_NODE_RNIC;
BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d20b4ef2c853..ffbd9a89981e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2460,6 +2460,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
memset(attr, 0, sizeof(*attr));
memset(init_attr, 0, sizeof(*init_attr));
attr->qp_state = to_ib_qp_state(qhp->attr.state);
+ attr->cur_qp_state = to_ib_qp_state(qhp->attr.state);
init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 0b0b93b529f3..d4b9226088bd 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -444,7 +444,10 @@ struct efa_admin_create_cq_cmd {
/*
* 4:0 : cq_entry_size_words - size of CQ entry in
* 32-bit words, valid values: 4, 8.
- * 7:5 : reserved7 - MBZ
+ * 5 : set_src_addr - If set, source address will be
+ * filled on RX completions from unknown senders.
+ * Requires 8 words CQ entry size.
+ * 7:6 : reserved7 - MBZ
*/
u8 cq_caps_2;
@@ -980,6 +983,7 @@ struct efa_admin_host_info {
#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5)
/* create_cq_resp */
#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0)
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index fb405da4e1db..8f8885e002ba 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -168,7 +168,10 @@ int efa_com_create_cq(struct efa_com_dev *edev,
EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1);
create_cmd.eqn = params->eqn;
}
-
+ if (params->set_src_addr) {
+ EFA_SET(&create_cmd.cq_caps_2,
+ EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1);
+ }
efa_com_set_dma_addr(params->dma_addr,
&create_cmd.cq_ba.mem_addr_high,
&create_cmd.cq_ba.mem_addr_low);
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index c33010bbf9e8..0898ad5bc340 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -75,7 +75,8 @@ struct efa_com_create_cq_params {
u16 uarn;
u16 eqn;
u8 entry_size_in_bytes;
- bool interrupt_mode_enabled;
+ u8 interrupt_mode_enabled : 1;
+ u8 set_src_addr : 1;
};
struct efa_com_create_cq_result {
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
new file mode 100644
index 000000000000..17ba8984b11e
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_IO_H_
+#define _EFA_IO_H_
+
+#define EFA_IO_TX_DESC_NUM_BUFS 2
+#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
+#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
+#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
+
+enum efa_io_queue_type {
+ /* send queue (of a QP) */
+ EFA_IO_SEND_QUEUE = 1,
+ /* recv queue (of a QP) */
+ EFA_IO_RECV_QUEUE = 2,
+};
+
+enum efa_io_send_op_type {
+ /* send message */
+ EFA_IO_SEND = 0,
+ /* RDMA read */
+ EFA_IO_RDMA_READ = 1,
+};
+
+enum efa_io_comp_status {
+ /* Successful completion */
+ EFA_IO_COMP_STATUS_OK = 0,
+ /* Flushed during QP destroy */
+ EFA_IO_COMP_STATUS_FLUSHED = 1,
+ /* Internal QP error */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
+ /* Bad operation type */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3,
+ /* Bad AH */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
+ /* LKEY not registered or does not match IOVA */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
+ /* Message too long */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
+ /* Destination ENI is down or does not run EFA */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
+ /* Connection was reset by remote side */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
+ /* Bad dest QP number (QP does not exist or is in error state) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9,
+ /* Destination resource not ready (no WQEs posted on RQ) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10,
+ /* Receiver SGL too short */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
+ /* Unexpected status returned by responder */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
+ /* Unresponsive remote - detected locally */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
+};
+
+struct efa_io_tx_meta_desc {
+ /* Verbs-generated Request ID */
+ u16 req_id;
+
+ /*
+ * control flags
+ * 3:0 : op_type - operation type: send/rdma/fast mem
+ * ops/etc
+ * 4 : has_imm - immediate_data field carries valid
+ * data.
+ * 5 : inline_msg - inline mode - inline message data
+ * follows this descriptor (no buffer descriptors).
+ * Note that it is different from immediate data
+ * 6 : meta_extension - Extended metadata. MBZ
+ * 7 : meta_desc - Indicates metadata descriptor.
+ * Must be set.
+ */
+ u8 ctrl1;
+
+ /*
+ * control flags
+ * 0 : phase
+ * 1 : reserved25 - MBZ
+ * 2 : first - Indicates first descriptor in
+ * transaction. Must be set.
+ * 3 : last - Indicates last descriptor in
+ * transaction. Must be set.
+ * 4 : comp_req - Indicates whether completion should
+ * be posted, after packet is transmitted. Valid only
+ * for the first descriptor
+ * 7:5 : reserved29 - MBZ
+ */
+ u8 ctrl2;
+
+ u16 dest_qp_num;
+
+ /*
+ * If inline_msg bit is set, length of inline message in bytes,
+ * otherwise length of SGL (number of buffers).
+ */
+ u16 length;
+
+ /*
+ * immediate data: if has_imm is set, then this field is included
+ * within Tx message and reported in remote Rx completion.
+ */
+ u32 immediate_data;
+
+ u16 ah;
+
+ u16 reserved;
+
+ /* Queue key */
+ u32 qkey;
+
+ u8 reserved2[12];
+};
+
+/*
+ * Tx queue buffer descriptor, for any transport type. Preceded by metadata
+ * descriptor.
+ */
+struct efa_io_tx_buf_desc {
+ /* length in bytes */
+ u32 length;
+
+ /*
+ * 23:0 : lkey - local memory translation key
+ * 31:24 : reserved - MBZ
+ */
+ u32 lkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_remote_mem_addr {
+ /* length in bytes */
+ u32 length;
+
+ /* remote memory translation key */
+ u32 rkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_rdma_req {
+ /* Remote memory address */
+ struct efa_io_remote_mem_addr remote_mem;
+
+ /* Local memory address */
+ struct efa_io_tx_buf_desc local_mem[1];
+};
+
+/*
+ * Tx WQE, composed of tx meta descriptors followed by either tx buffer
+ * descriptors or inline data
+ */
+struct efa_io_tx_wqe {
+ /* TX meta */
+ struct efa_io_tx_meta_desc meta;
+
+ union {
+ /* Send buffer descriptors */
+ struct efa_io_tx_buf_desc sgl[2];
+
+ u8 inline_data[32];
+
+ /* RDMA local and remote memory addresses */
+ struct efa_io_rdma_req rdma_req;
+ } data;
+};
+
+/*
+ * Rx buffer descriptor; RX WQE is composed of one or more RX buffer
+ * descriptors.
+ */
+struct efa_io_rx_desc {
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer Pointer[63:32] */
+ u32 buf_addr_hi;
+
+ /* Verbs-generated request id. */
+ u16 req_id;
+
+ /* Length in bytes. */
+ u16 length;
+
+ /*
+ * LKey and control flags
+ * 23:0 : lkey
+ * 29:24 : reserved - MBZ
+ * 30 : first - Indicates first descriptor in WQE
+ * 31 : last - Indicates last descriptor in WQE
+ */
+ u32 lkey_ctrl;
+};
+
+/* Common IO completion descriptor */
+struct efa_io_cdesc_common {
+ /*
+ * verbs-generated request ID, as provided in the completed tx or rx
+ * descriptor.
+ */
+ u16 req_id;
+
+ u8 status;
+
+ /*
+ * flags
+ * 0 : phase - Phase bit
+ * 2:1 : q_type - enum efa_io_queue_type: send/recv
+ * 3 : has_imm - indicates that immediate data is
+ * present - for RX completions only
+ * 7:4 : reserved28 - MBZ
+ */
+ u8 flags;
+
+ /* local QP number */
+ u16 qp_num;
+
+ /* Transferred length */
+ u16 length;
+};
+
+/* Tx completion descriptor */
+struct efa_io_tx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+};
+
+/* Rx Completion Descriptor */
+struct efa_io_rx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+
+ /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */
+ u16 ah;
+
+ u16 src_qp_num;
+
+ /* Immediate data */
+ u32 imm;
+};
+
+/* Extended Rx Completion Descriptor */
+struct efa_io_rx_cdesc_ex {
+ /* Base RX completion info */
+ struct efa_io_rx_cdesc rx_cdesc_base;
+
+ /*
+ * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is
+ * enabled.
+ */
+ u8 src_addr[16];
+};
+
+/* tx_meta_desc */
+#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0)
+#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4)
+#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5)
+#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6)
+#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7)
+#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0)
+#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2)
+#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3)
+#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4)
+
+/* tx_buf_desc */
+#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
+
+/* rx_desc */
+#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
+#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
+#define EFA_IO_RX_DESC_LAST_MASK BIT(31)
+
+/* cdesc_common */
+#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0)
+#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
+#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
+
+#endif /* _EFA_IO_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 94b94cca4870..15ee92081118 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
@@ -14,10 +14,12 @@
#define PCI_DEV_ID_EFA0_VF 0xefa0
#define PCI_DEV_ID_EFA1_VF 0xefa1
+#define PCI_DEV_ID_EFA2_VF 0xefa2
static const struct pci_device_id efa_pci_tbl[] = {
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) },
{ }
};
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index ecfe70eb5efb..31454643f8c5 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/dma-buf.h>
@@ -15,6 +15,7 @@
#include <rdma/uverbs_ioctl.h>
#include "efa.h"
+#include "efa_io_defs.h"
enum {
EFA_MMAP_DMA_PAGE = 0,
@@ -242,6 +243,7 @@ int efa_query_device(struct ib_device *ibdev,
resp.max_rq_wr = dev_attr->max_rq_depth;
resp.max_rdma_size = dev_attr->max_rdma_size;
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
if (EFA_DEV_CAP(dev, RDMA_READ))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
@@ -1064,6 +1066,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct efa_ibv_create_cq cmd = {};
struct efa_cq *cq = to_ecq(ibcq);
int entries = attr->cqe;
+ bool set_src_addr;
int err;
ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
@@ -1109,7 +1112,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_out;
}
- if (!cmd.cq_entry_size) {
+ set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID);
+ if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) &&
+ (set_src_addr ||
+ cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) {
ibdev_dbg(ibdev,
"Invalid entry size [%u]\n", cmd.cq_entry_size);
err = -EINVAL;
@@ -1138,6 +1144,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
params.dma_addr = cq->dma_addr;
params.entry_size_in_bytes = cmd.cq_entry_size;
params.num_sub_cqs = cmd.num_sub_cqs;
+ params.set_src_addr = set_src_addr;
if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
cq->eq = efa_vec2eq(dev, attr->comp_vector);
params.eqn = cq->eq->eeq.eqn;
diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig
new file mode 100644
index 000000000000..169038e3ceb1
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_ERDMA
+ tristate "Alibaba Elastic RDMA Adapter (ERDMA) support"
+ depends on PCI_MSI && 64BIT
+ depends on INFINIBAND_ADDR_TRANS
+ depends on INFINIBAND_USER_ACCESS
+ help
+ This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA),
+ which supports RDMA features in Alibaba cloud environment.
+
+ To compile this driver as module, choose M here. The module will be
+ called erdma.
diff --git a/drivers/infiniband/hw/erdma/Makefile b/drivers/infiniband/hw/erdma/Makefile
new file mode 100644
index 000000000000..51d2ef91905a
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o
+
+erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
new file mode 100644
index 000000000000..730783fbc894
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_H__
+#define __ERDMA_H__
+
+#include <linux/bitfield.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+#include <rdma/ib_verbs.h>
+
+#include "erdma_hw.h"
+
+#define DRV_MODULE_NAME "erdma"
+#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack"
+
+struct erdma_eq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+
+ u16 ci;
+ u16 rsvd;
+
+ atomic64_t event_num;
+ atomic64_t notify_num;
+
+ u64 __iomem *db_addr;
+ u64 *db_record;
+};
+
+struct erdma_cmdq_sq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+ u16 ci;
+ u16 pi;
+
+ u16 wqebb_cnt;
+
+ u64 *db_record;
+};
+
+struct erdma_cmdq_cq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+ u32 ci;
+ u32 cmdsn;
+
+ u64 *db_record;
+
+ atomic64_t armed_num;
+};
+
+enum {
+ ERDMA_CMD_STATUS_INIT,
+ ERDMA_CMD_STATUS_ISSUED,
+ ERDMA_CMD_STATUS_FINISHED,
+ ERDMA_CMD_STATUS_TIMEOUT
+};
+
+struct erdma_comp_wait {
+ struct completion wait_event;
+ u32 cmd_status;
+ u32 ctx_id;
+ u16 sq_pi;
+ u8 comp_status;
+ u8 rsvd;
+ u32 comp_data[4];
+};
+
+enum {
+ ERDMA_CMDQ_STATE_OK_BIT = 0,
+ ERDMA_CMDQ_STATE_TIMEOUT_BIT = 1,
+ ERDMA_CMDQ_STATE_CTX_ERR_BIT = 2,
+};
+
+#define ERDMA_CMDQ_TIMEOUT_MS 15000
+#define ERDMA_REG_ACCESS_WAIT_MS 20
+#define ERDMA_WAIT_DEV_DONE_CNT 500
+
+struct erdma_cmdq {
+ unsigned long *comp_wait_bitmap;
+ struct erdma_comp_wait *wait_pool;
+ spinlock_t lock;
+
+ bool use_event;
+
+ struct erdma_cmdq_sq sq;
+ struct erdma_cmdq_cq cq;
+ struct erdma_eq eq;
+
+ unsigned long state;
+
+ struct semaphore credits;
+ u16 max_outstandings;
+};
+
+#define COMPROMISE_CC ERDMA_CC_CUBIC
+enum erdma_cc_alg {
+ ERDMA_CC_NEWRENO = 0,
+ ERDMA_CC_CUBIC,
+ ERDMA_CC_HPCC_RTT,
+ ERDMA_CC_HPCC_ECN,
+ ERDMA_CC_HPCC_INT,
+ ERDMA_CC_METHODS_NUM
+};
+
+struct erdma_devattr {
+ u32 fw_version;
+
+ unsigned char peer_addr[ETH_ALEN];
+
+ int numa_node;
+ enum erdma_cc_alg cc;
+ u32 grp_num;
+ u32 irq_num;
+
+ bool disable_dwqe;
+ u16 dwqe_pages;
+ u16 dwqe_entries;
+
+ u32 max_qp;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_ord;
+ u32 max_ird;
+
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_sge_rd;
+ u32 max_cq;
+ u32 max_cqe;
+ u64 max_mr_size;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_mw;
+ u32 local_dma_key;
+};
+
+#define ERDMA_IRQNAME_SIZE 50
+
+struct erdma_irq {
+ char name[ERDMA_IRQNAME_SIZE];
+ u32 msix_vector;
+ cpumask_t affinity_hint_mask;
+};
+
+struct erdma_eq_cb {
+ bool ready;
+ void *dev; /* All EQs use this fields to get erdma_dev struct */
+ struct erdma_irq irq;
+ struct erdma_eq eq;
+ struct tasklet_struct tasklet;
+};
+
+struct erdma_resource_cb {
+ unsigned long *bitmap;
+ spinlock_t lock;
+ u32 next_alloc_idx;
+ u32 max_cap;
+};
+
+enum {
+ ERDMA_RES_TYPE_PD = 0,
+ ERDMA_RES_TYPE_STAG_IDX = 1,
+ ERDMA_RES_CNT = 2,
+};
+
+#define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE
+#define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE)
+
+struct erdma_dev {
+ struct ib_device ibdev;
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ struct notifier_block netdev_nb;
+
+ resource_size_t func_bar_addr;
+ resource_size_t func_bar_len;
+ u8 __iomem *func_bar;
+
+ struct erdma_devattr attrs;
+ /* physical port state (only one port per device) */
+ enum ib_port_state state;
+ u32 mtu;
+
+ /* cmdq and aeq use the same msix vector */
+ struct erdma_irq comm_irq;
+ struct erdma_cmdq cmdq;
+ struct erdma_eq aeq;
+ struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1];
+
+ spinlock_t lock;
+ struct erdma_resource_cb res_cb[ERDMA_RES_CNT];
+ struct xarray qp_xa;
+ struct xarray cq_xa;
+
+ u32 next_alloc_qpn;
+ u32 next_alloc_cqn;
+
+ spinlock_t db_bitmap_lock;
+ /* We provide max 64 uContexts that each has one SQ doorbell Page. */
+ DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
+ /*
+ * We provide max 496 uContexts that each has one SQ normal Db,
+ * and one directWQE db。
+ */
+ DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);
+
+ atomic_t num_ctx;
+ struct list_head cep_list;
+};
+
+static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
+{
+ idx &= (depth - 1);
+
+ return qbuf + (idx << shift);
+}
+
+static inline struct erdma_dev *to_edev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct erdma_dev, ibdev);
+}
+
+static inline u32 erdma_reg_read32(struct erdma_dev *dev, u32 reg)
+{
+ return readl(dev->func_bar + reg);
+}
+
+static inline u64 erdma_reg_read64(struct erdma_dev *dev, u32 reg)
+{
+ return readq(dev->func_bar + reg);
+}
+
+static inline void erdma_reg_write32(struct erdma_dev *dev, u32 reg, u32 value)
+{
+ writel(value, dev->func_bar + reg);
+}
+
+static inline void erdma_reg_write64(struct erdma_dev *dev, u32 reg, u64 value)
+{
+ writeq(value, dev->func_bar + reg);
+}
+
+static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg,
+ u32 filed_mask)
+{
+ u32 val = erdma_reg_read32(dev, reg);
+
+ return FIELD_GET(filed_mask, val);
+}
+
+int erdma_cmdq_init(struct erdma_dev *dev);
+void erdma_finish_cmdq_init(struct erdma_dev *dev);
+void erdma_cmdq_destroy(struct erdma_dev *dev);
+
+void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
+int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
+ u64 *resp0, u64 *resp1);
+void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
+
+int erdma_ceqs_init(struct erdma_dev *dev);
+void erdma_ceqs_uninit(struct erdma_dev *dev);
+void notify_eq(struct erdma_eq *eq);
+void *get_next_valid_eqe(struct erdma_eq *eq);
+
+int erdma_aeq_init(struct erdma_dev *dev);
+void erdma_aeq_destroy(struct erdma_dev *dev);
+
+void erdma_aeq_event_handler(struct erdma_dev *dev);
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb);
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c
new file mode 100644
index 000000000000..74f6348f240a
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cm.c
@@ -0,0 +1,1422 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Fredy Neeser */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#include <linux/workqueue.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+static struct workqueue_struct *erdma_cm_wq;
+
+static void erdma_cm_llp_state_change(struct sock *sk);
+static void erdma_cm_llp_data_ready(struct sock *sk);
+static void erdma_cm_llp_error_report(struct sock *sk);
+
+static void erdma_sk_assign_cm_upcalls(struct sock *sk)
+{
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_state_change = erdma_cm_llp_state_change;
+ sk->sk_data_ready = erdma_cm_llp_data_ready;
+ sk->sk_error_report = erdma_cm_llp_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void erdma_sk_save_upcalls(struct sock *sk)
+{
+ struct erdma_cep *cep = sk_to_cep(sk);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ cep->sk_state_change = sk->sk_state_change;
+ cep->sk_data_ready = sk->sk_data_ready;
+ cep->sk_error_report = sk->sk_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
+{
+ sk->sk_state_change = cep->sk_state_change;
+ sk->sk_data_ready = cep->sk_data_ready;
+ sk->sk_error_report = cep->sk_error_report;
+ sk->sk_user_data = NULL;
+}
+
+static void erdma_socket_disassoc(struct socket *s)
+{
+ struct sock *sk = s->sk;
+ struct erdma_cep *cep;
+
+ if (sk) {
+ write_lock_bh(&sk->sk_callback_lock);
+ cep = sk_to_cep(sk);
+ if (cep) {
+ erdma_sk_restore_upcalls(sk, cep);
+ erdma_cep_put(cep);
+ } else {
+ WARN_ON_ONCE(1);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+ } else {
+ WARN_ON_ONCE(1);
+ }
+}
+
+static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
+{
+ cep->sock = s;
+ erdma_cep_get(cep);
+ s->sk->sk_user_data = cep;
+
+ erdma_sk_save_upcalls(s->sk);
+ erdma_sk_assign_cm_upcalls(s->sk);
+}
+
+static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
+{
+ if (cep->listen_cep) {
+ erdma_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ }
+}
+
+static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
+{
+ struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+ unsigned long flags;
+
+ if (!cep)
+ return NULL;
+
+ INIT_LIST_HEAD(&cep->listenq);
+ INIT_LIST_HEAD(&cep->devq);
+ INIT_LIST_HEAD(&cep->work_freelist);
+
+ kref_init(&cep->ref);
+ cep->state = ERDMA_EPSTATE_IDLE;
+ init_waitqueue_head(&cep->waitq);
+ spin_lock_init(&cep->lock);
+ cep->dev = dev;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ list_add_tail(&cep->devq, &dev->cep_list);
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ return cep;
+}
+
+static void erdma_cm_free_work(struct erdma_cep *cep)
+{
+ struct list_head *w, *tmp;
+ struct erdma_cm_work *work;
+
+ list_for_each_safe(w, tmp, &cep->work_freelist) {
+ work = list_entry(w, struct erdma_cm_work, list);
+ list_del(&work->list);
+ kfree(work);
+ }
+}
+
+static void erdma_cancel_mpatimer(struct erdma_cep *cep)
+{
+ spin_lock_bh(&cep->lock);
+ if (cep->mpa_timer) {
+ if (cancel_delayed_work(&cep->mpa_timer->work)) {
+ erdma_cep_put(cep);
+ kfree(cep->mpa_timer);
+ }
+ cep->mpa_timer = NULL;
+ }
+ spin_unlock_bh(&cep->lock);
+}
+
+static void erdma_put_work(struct erdma_cm_work *work)
+{
+ INIT_LIST_HEAD(&work->list);
+ spin_lock_bh(&work->cep->lock);
+ list_add(&work->list, &work->cep->work_freelist);
+ spin_unlock_bh(&work->cep->lock);
+}
+
+static void erdma_cep_set_inuse(struct erdma_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ while (cep->in_use) {
+ spin_unlock_irqrestore(&cep->lock, flags);
+ wait_event_interruptible(cep->waitq, !cep->in_use);
+ if (signal_pending(current))
+ flush_signals(current);
+
+ spin_lock_irqsave(&cep->lock, flags);
+ }
+
+ cep->in_use = 1;
+ spin_unlock_irqrestore(&cep->lock, flags);
+}
+
+static void erdma_cep_set_free(struct erdma_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ cep->in_use = 0;
+ spin_unlock_irqrestore(&cep->lock, flags);
+
+ wake_up(&cep->waitq);
+}
+
+static void __erdma_cep_dealloc(struct kref *ref)
+{
+ struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
+ struct erdma_dev *dev = cep->dev;
+ unsigned long flags;
+
+ WARN_ON(cep->listen_cep);
+
+ kfree(cep->private_data);
+ kfree(cep->mpa.pdata);
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist))
+ erdma_cm_free_work(cep);
+ spin_unlock_bh(&cep->lock);
+
+ spin_lock_irqsave(&dev->lock, flags);
+ list_del(&cep->devq);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ kfree(cep);
+}
+
+static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
+{
+ struct erdma_cm_work *work = NULL;
+
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist)) {
+ work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
+ list);
+ list_del_init(&work->list);
+ }
+
+ spin_unlock_bh(&cep->lock);
+ return work;
+}
+
+static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
+{
+ struct erdma_cm_work *work;
+
+ while (num--) {
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ if (!(list_empty(&cep->work_freelist)))
+ erdma_cm_free_work(cep);
+ return -ENOMEM;
+ }
+ work->cep = cep;
+ INIT_LIST_HEAD(&work->list);
+ list_add(&work->list, &cep->work_freelist);
+ }
+
+ return 0;
+}
+
+static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
+ int status)
+{
+ struct iw_cm_event event;
+ struct iw_cm_id *cm_id;
+
+ memset(&event, 0, sizeof(event));
+ event.status = status;
+ event.event = reason;
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
+ event.provider_data = cep;
+ cm_id = cep->listen_cep->cm_id;
+
+ event.ird = cep->dev->attrs.max_ird;
+ event.ord = cep->dev->attrs.max_ord;
+ } else {
+ cm_id = cep->cm_id;
+ }
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
+ reason == IW_CM_EVENT_CONNECT_REPLY) {
+ u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
+
+ if (pd_len && cep->mpa.pdata) {
+ event.private_data_len = pd_len;
+ event.private_data = cep->mpa.pdata;
+ }
+
+ getname_local(cep->sock, &event.local_addr);
+ getname_peer(cep->sock, &event.remote_addr);
+ }
+
+ return cm_id->event_handler(cm_id, &event);
+}
+
+void erdma_qp_cm_drop(struct erdma_qp *qp)
+{
+ struct erdma_cep *cep = qp->cep;
+
+ if (!qp->cep)
+ return;
+
+ erdma_cep_set_inuse(cep);
+
+ /* already closed. */
+ if (cep->state == ERDMA_EPSTATE_CLOSED)
+ goto out;
+
+ if (cep->cm_id) {
+ switch (cep->state) {
+ case ERDMA_EPSTATE_AWAIT_MPAREP:
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EINVAL);
+ break;
+ case ERDMA_EPSTATE_RDMA_MODE:
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ break;
+ case ERDMA_EPSTATE_IDLE:
+ case ERDMA_EPSTATE_LISTENING:
+ case ERDMA_EPSTATE_CONNECTING:
+ case ERDMA_EPSTATE_AWAIT_MPAREQ:
+ case ERDMA_EPSTATE_RECVD_MPAREQ:
+ case ERDMA_EPSTATE_CLOSED:
+ default:
+ break;
+ }
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ erdma_cep_put(cep);
+ }
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+
+ if (cep->qp) {
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+ }
+out:
+ erdma_cep_set_free(cep);
+}
+
+void erdma_cep_put(struct erdma_cep *cep)
+{
+ WARN_ON(kref_read(&cep->ref) < 1);
+ kref_put(&cep->ref, __erdma_cep_dealloc);
+}
+
+void erdma_cep_get(struct erdma_cep *cep)
+{
+ kref_get(&cep->ref);
+}
+
+static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
+ u8 pd_len)
+{
+ struct socket *s = cep->sock;
+ struct mpa_rr *rr = &cep->mpa.hdr;
+ struct kvec iov[3];
+ struct msghdr msg;
+ int iovec_num = 0;
+ int ret;
+ int mpa_len;
+
+ memset(&msg, 0, sizeof(msg));
+
+ rr->params.pd_len = cpu_to_be16(pd_len);
+
+ iov[iovec_num].iov_base = rr;
+ iov[iovec_num].iov_len = sizeof(*rr);
+ iovec_num++;
+ mpa_len = sizeof(*rr);
+
+ iov[iovec_num].iov_base = &cep->mpa.ext_data;
+ iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
+ iovec_num++;
+ mpa_len += sizeof(cep->mpa.ext_data);
+
+ if (pd_len) {
+ iov[iovec_num].iov_base = (char *)pdata;
+ iov[iovec_num].iov_len = pd_len;
+ mpa_len += pd_len;
+ iovec_num++;
+ }
+
+ ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
+
+ return ret < 0 ? ret : 0;
+}
+
+static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
+ int flags)
+{
+ struct kvec iov = { buf, size };
+ struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
+
+ return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
+}
+
+static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
+ int hdr_size, int *rcvd_out)
+{
+ struct socket *s = cep->sock;
+ int rcvd;
+
+ *rcvd_out = 0;
+ if (hdr_rcvd < hdr_size) {
+ rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
+ MSG_DONTWAIT);
+ if (rcvd == -EAGAIN)
+ return -EAGAIN;
+
+ if (rcvd <= 0)
+ return -ECONNABORTED;
+
+ hdr_rcvd += rcvd;
+ *rcvd_out = rcvd;
+
+ if (hdr_rcvd < hdr_size)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
+{
+ *bits = (*bits & ~MPA_RR_MASK_REVISION) |
+ (cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
+}
+
+static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
+{
+ __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
+
+ return (u8)be16_to_cpu(rev);
+}
+
+static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
+{
+ *bits = (*bits & ~MPA_EXT_FLAG_CC) |
+ (cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
+}
+
+static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
+{
+ __be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
+
+ return (u8)be32_to_cpu(cc);
+}
+
+/*
+ * Receive MPA Request/Reply header.
+ *
+ * Returns 0 if complete MPA Request/Reply haeder including
+ * eventual private data was received. Returns -EAGAIN if
+ * header was partially received or negative error code otherwise.
+ *
+ * Context: May be called in process context only
+ */
+static int erdma_recv_mpa_rr(struct erdma_cep *cep)
+{
+ struct mpa_rr *hdr = &cep->mpa.hdr;
+ struct socket *s = cep->sock;
+ u16 pd_len;
+ int rcvd, to_rcv, ret, pd_rcvd;
+
+ if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
+ ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
+ (char *)&cep->mpa.hdr,
+ sizeof(struct mpa_rr), &rcvd);
+ cep->mpa.bytes_rcvd += rcvd;
+ if (ret)
+ return ret;
+ }
+
+ if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
+ __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
+ return -EPROTO;
+
+ if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
+ sizeof(struct erdma_mpa_ext)) {
+ ret = __recv_mpa_hdr(
+ cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
+ (char *)&cep->mpa.ext_data,
+ sizeof(struct erdma_mpa_ext), &rcvd);
+ cep->mpa.bytes_rcvd += rcvd;
+ if (ret)
+ return ret;
+ }
+
+ pd_len = be16_to_cpu(hdr->params.pd_len);
+ pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
+ sizeof(struct erdma_mpa_ext);
+ to_rcv = pd_len - pd_rcvd;
+
+ if (!to_rcv) {
+ /*
+ * We have received the whole MPA Request/Reply message.
+ * Check against peer protocol violation.
+ */
+ u32 word;
+
+ ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
+ &rcvd);
+ if (ret == -EAGAIN && rcvd == 0)
+ return 0;
+
+ if (ret)
+ return ret;
+
+ return -EPROTO;
+ }
+
+ /*
+ * At this point, MPA header has been fully received, and pd_len != 0.
+ * So, begin to receive private data.
+ */
+ if (!cep->mpa.pdata) {
+ cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
+ if (!cep->mpa.pdata)
+ return -ENOMEM;
+ }
+
+ rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
+ MSG_DONTWAIT);
+ if (rcvd < 0)
+ return rcvd;
+
+ if (rcvd > to_rcv)
+ return -EPROTO;
+
+ cep->mpa.bytes_rcvd += rcvd;
+
+ if (to_rcv == rcvd)
+ return 0;
+
+ return -EAGAIN;
+}
+
+/*
+ * erdma_proc_mpareq()
+ *
+ * Read MPA Request from socket and signal new connection to IWCM
+ * if success. Caller must hold lock on corresponding listening CEP.
+ */
+static int erdma_proc_mpareq(struct erdma_cep *cep)
+{
+ struct mpa_rr *req;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ return ret;
+
+ req = &cep->mpa.hdr;
+
+ if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
+ return -EPROTO;
+
+ memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
+
+ /* Currently does not support marker and crc. */
+ if (req->params.bits & MPA_RR_FLAG_MARKERS ||
+ req->params.bits & MPA_RR_FLAG_CRC)
+ goto reject_conn;
+
+ cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
+
+ /* Keep reference until IWCM accepts/rejects */
+ erdma_cep_get(cep);
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
+ if (ret)
+ erdma_cep_put(cep);
+
+ return ret;
+
+reject_conn:
+ req->params.bits &= ~MPA_RR_FLAG_MARKERS;
+ req->params.bits |= MPA_RR_FLAG_REJECT;
+ req->params.bits &= ~MPA_RR_FLAG_CRC;
+
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ erdma_send_mpareqrep(cep, NULL, 0);
+
+ return -EOPNOTSUPP;
+}
+
+static int erdma_proc_mpareply(struct erdma_cep *cep)
+{
+ struct erdma_qp_attrs qp_attrs;
+ struct erdma_qp *qp = cep->qp;
+ struct mpa_rr *rep;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ goto out_err;
+
+ erdma_cancel_mpatimer(cep);
+
+ rep = &cep->mpa.hdr;
+
+ if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
+ ret = -EPROTO;
+ goto out_err;
+ }
+
+ if (rep->params.bits & MPA_RR_FLAG_REJECT) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
+ return -ECONNRESET;
+ }
+
+ /* Currently does not support marker and crc. */
+ if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
+ (rep->params.bits & MPA_RR_FLAG_CRC)) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
+ return -EINVAL;
+ }
+
+ memset(&qp_attrs, 0, sizeof(qp_attrs));
+ qp_attrs.irq_size = cep->ird;
+ qp_attrs.orq_size = cep->ord;
+ qp_attrs.state = ERDMA_QP_STATE_RTS;
+
+ down_write(&qp->state_lock);
+ if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto out_err;
+ }
+
+ qp->attrs.qp_type = ERDMA_QP_ACTIVE;
+ if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
+ qp->attrs.cc = COMPROMISE_CC;
+
+ ret = erdma_modify_qp_internal(qp, &qp_attrs,
+ ERDMA_QP_ATTR_STATE |
+ ERDMA_QP_ATTR_LLP_HANDLE |
+ ERDMA_QP_ATTR_MPA);
+
+ up_write(&qp->state_lock);
+
+ if (!ret) {
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
+ if (!ret)
+ cep->state = ERDMA_EPSTATE_RDMA_MODE;
+
+ return 0;
+ }
+
+out_err:
+ if (ret != -EAGAIN)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+
+ return ret;
+}
+
+static void erdma_accept_newconn(struct erdma_cep *cep)
+{
+ struct socket *s = cep->sock;
+ struct socket *new_s = NULL;
+ struct erdma_cep *new_cep = NULL;
+ int ret = 0;
+
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ goto error;
+
+ new_cep = erdma_cep_alloc(cep->dev);
+ if (!new_cep)
+ goto error;
+
+ /*
+ * 4: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout.
+ */
+ if (erdma_cm_alloc_work(new_cep, 4) != 0)
+ goto error;
+
+ /*
+ * Copy saved socket callbacks from listening CEP
+ * and assign new socket with new CEP
+ */
+ new_cep->sk_state_change = cep->sk_state_change;
+ new_cep->sk_data_ready = cep->sk_data_ready;
+ new_cep->sk_error_report = cep->sk_error_report;
+
+ ret = kernel_accept(s, &new_s, O_NONBLOCK);
+ if (ret != 0)
+ goto error;
+
+ new_cep->sock = new_s;
+ erdma_cep_get(new_cep);
+ new_s->sk->sk_user_data = new_cep;
+
+ tcp_sock_set_nodelay(new_s->sk);
+ new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
+
+ ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
+ if (ret)
+ goto error;
+
+ new_cep->listen_cep = cep;
+ erdma_cep_get(cep);
+
+ if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
+ /* MPA REQ already queued */
+ erdma_cep_set_inuse(new_cep);
+ ret = erdma_proc_mpareq(new_cep);
+ if (ret != -EAGAIN) {
+ erdma_cep_put(cep);
+ new_cep->listen_cep = NULL;
+ if (ret) {
+ erdma_cep_set_free(new_cep);
+ goto error;
+ }
+ }
+ erdma_cep_set_free(new_cep);
+ }
+ return;
+
+error:
+ if (new_cep) {
+ new_cep->state = ERDMA_EPSTATE_CLOSED;
+ erdma_cancel_mpatimer(new_cep);
+
+ erdma_cep_put(new_cep);
+ new_cep->sock = NULL;
+ }
+
+ if (new_s) {
+ erdma_socket_disassoc(new_s);
+ sock_release(new_s);
+ }
+}
+
+static int erdma_newconn_connected(struct erdma_cep *cep)
+{
+ int ret = 0;
+
+ cep->mpa.hdr.params.bits = 0;
+ __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
+
+ memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
+ __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
+
+ ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
+ cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (ret >= 0)
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
+
+ return ret;
+}
+
+static void erdma_cm_work_handler(struct work_struct *w)
+{
+ struct erdma_cm_work *work;
+ struct erdma_cep *cep;
+ int release_cep = 0, ret = 0;
+
+ work = container_of(w, struct erdma_cm_work, work.work);
+ cep = work->cep;
+
+ erdma_cep_set_inuse(cep);
+
+ switch (work->type) {
+ case ERDMA_CM_WORK_CONNECTED:
+ erdma_cancel_mpatimer(cep);
+ if (cep->state == ERDMA_EPSTATE_CONNECTING) {
+ ret = erdma_newconn_connected(cep);
+ if (ret) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EIO);
+ release_cep = 1;
+ }
+ }
+ break;
+ case ERDMA_CM_WORK_CONNECTTIMEOUT:
+ if (cep->state == ERDMA_EPSTATE_CONNECTING) {
+ cep->mpa_timer = NULL;
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ }
+ break;
+ case ERDMA_CM_WORK_ACCEPT:
+ erdma_accept_newconn(cep);
+ break;
+ case ERDMA_CM_WORK_READ_MPAHDR:
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ if (cep->listen_cep) {
+ erdma_cep_set_inuse(cep->listen_cep);
+
+ if (cep->listen_cep->state ==
+ ERDMA_EPSTATE_LISTENING)
+ ret = erdma_proc_mpareq(cep);
+ else
+ ret = -EFAULT;
+
+ erdma_cep_set_free(cep->listen_cep);
+
+ if (ret != -EAGAIN) {
+ erdma_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ if (ret)
+ erdma_cep_put(cep);
+ }
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ ret = erdma_proc_mpareply(cep);
+ }
+
+ if (ret && ret != -EAGAIN)
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_CLOSE_LLP:
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_PEER_CLOSE:
+ if (cep->cm_id) {
+ if (cep->state == ERDMA_EPSTATE_CONNECTING ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA reply not received, but connection drop
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
+ /*
+ * NOTE: IW_CM_EVENT_DISCONNECT is given just
+ * to transition IWCM into CLOSING.
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* Socket close before MPA request received. */
+ erdma_disassoc_listen_cep(cep);
+ erdma_cep_put(cep);
+ }
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_MPATIMEOUT:
+ cep->mpa_timer = NULL;
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA request timed out:
+ * Hide any partially received private data and signal
+ * timeout
+ */
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* No MPA req received after peer TCP stream setup. */
+ erdma_disassoc_listen_cep(cep);
+
+ erdma_cep_put(cep);
+ release_cep = 1;
+ }
+ break;
+ default:
+ WARN(1, "Undefined CM work type: %d\n", work->type);
+ }
+
+ if (release_cep) {
+ erdma_cancel_mpatimer(cep);
+ cep->state = ERDMA_EPSTATE_CLOSED;
+ if (cep->qp) {
+ struct erdma_qp *qp = cep->qp;
+ /*
+ * Serialize a potential race with application
+ * closing the QP and calling erdma_qp_cm_drop()
+ */
+ erdma_qp_get(qp);
+ erdma_cep_set_free(cep);
+
+ erdma_qp_llp_close(qp);
+ erdma_qp_put(qp);
+
+ erdma_cep_set_inuse(cep);
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+ }
+
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ erdma_cep_put(cep);
+ }
+ }
+ erdma_cep_set_free(cep);
+ erdma_put_work(work);
+ erdma_cep_put(cep);
+}
+
+int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
+{
+ struct erdma_cm_work *work = erdma_get_work(cep);
+ unsigned long delay = 0;
+
+ if (!work)
+ return -ENOMEM;
+
+ work->type = type;
+ work->cep = cep;
+
+ erdma_cep_get(cep);
+
+ INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
+
+ if (type == ERDMA_CM_WORK_MPATIMEOUT) {
+ cep->mpa_timer = work;
+
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
+ delay = MPAREP_TIMEOUT;
+ else
+ delay = MPAREQ_TIMEOUT;
+ } else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
+ cep->mpa_timer = work;
+
+ delay = CONNECT_TIMEOUT;
+ }
+
+ queue_delayed_work(erdma_cm_wq, &work->work, delay);
+
+ return 0;
+}
+
+static void erdma_cm_llp_data_ready(struct sock *sk)
+{
+ struct erdma_cep *cep;
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep)
+ goto out;
+
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
+
+out:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+static void erdma_cm_llp_error_report(struct sock *sk)
+{
+ struct erdma_cep *cep = sk_to_cep(sk);
+
+ if (cep)
+ cep->sk_error_report(sk);
+}
+
+static void erdma_cm_llp_state_change(struct sock *sk)
+{
+ struct erdma_cep *cep;
+ void (*orig_state_change)(struct sock *sk);
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep) {
+ read_unlock(&sk->sk_callback_lock);
+ return;
+ }
+ orig_state_change = cep->sk_state_change;
+
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
+ if (cep->state == ERDMA_EPSTATE_CONNECTING)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
+ else
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
+ break;
+ case TCP_CLOSE:
+ case TCP_CLOSE_WAIT:
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
+ break;
+ default:
+ break;
+ }
+ read_unlock(&sk->sk_callback_lock);
+ orig_state_change(sk);
+}
+
+static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
+ int laddrlen, struct sockaddr *raddr,
+ int raddrlen, int flags)
+{
+ int ret;
+
+ sock_set_reuseaddr(s->sk);
+ ret = s->ops->bind(s, laddr, laddrlen);
+ if (ret)
+ return ret;
+ ret = s->ops->connect(s, raddr, raddrlen, flags);
+ return ret < 0 ? ret : 0;
+}
+
+int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct erdma_dev *dev = to_edev(id->device);
+ struct erdma_qp *qp;
+ struct erdma_cep *cep = NULL;
+ struct socket *s = NULL;
+ struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
+ u16 pd_len = params->private_data_len;
+ int ret;
+
+ if (pd_len > MPA_MAX_PRIVDATA)
+ return -EINVAL;
+
+ if (params->ird > dev->attrs.max_ird ||
+ params->ord > dev->attrs.max_ord)
+ return -EINVAL;
+
+ if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ qp = find_qp_by_qpn(dev, params->qpn);
+ if (!qp)
+ return -ENOENT;
+ erdma_qp_get(qp);
+
+ ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (ret < 0)
+ goto error_put_qp;
+
+ cep = erdma_cep_alloc(dev);
+ if (!cep) {
+ ret = -ENOMEM;
+ goto error_release_sock;
+ }
+
+ erdma_cep_set_inuse(cep);
+
+ /* Associate QP with CEP */
+ erdma_cep_get(cep);
+ qp->cep = cep;
+ cep->qp = qp;
+
+ /* Associate cm_id with CEP */
+ id->add_ref(id);
+ cep->cm_id = id;
+
+ /*
+ * 6: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout, connected event
+ * and connect timeout.
+ */
+ ret = erdma_cm_alloc_work(cep, 6);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto error_release_cep;
+ }
+
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+ cep->state = ERDMA_EPSTATE_CONNECTING;
+
+ erdma_cep_socket_assoc(cep, s);
+
+ if (pd_len) {
+ cep->pd_len = pd_len;
+ cep->private_data = kmalloc(pd_len, GFP_KERNEL);
+ if (!cep->private_data) {
+ ret = -ENOMEM;
+ goto error_disassoc;
+ }
+
+ memcpy(cep->private_data, params->private_data,
+ params->private_data_len);
+ }
+
+ ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
+ sizeof(*raddr), O_NONBLOCK);
+ if (ret != -EINPROGRESS && ret != 0) {
+ goto error_disassoc;
+ } else if (ret == 0) {
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
+ if (ret)
+ goto error_disassoc;
+ } else {
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
+ if (ret)
+ goto error_disassoc;
+ }
+
+ erdma_cep_set_free(cep);
+ return 0;
+
+error_disassoc:
+ kfree(cep->private_data);
+ cep->private_data = NULL;
+ cep->pd_len = 0;
+
+ erdma_socket_disassoc(s);
+
+error_release_cep:
+ /* disassoc with cm_id */
+ cep->cm_id = NULL;
+ id->rem_ref(id);
+
+ /* disassoc with qp */
+ qp->cep = NULL;
+ erdma_cep_put(cep);
+ cep->qp = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+
+ /* release the cep. */
+ erdma_cep_put(cep);
+
+error_release_sock:
+ if (s)
+ sock_release(s);
+error_put_qp:
+ erdma_qp_put(qp);
+
+ return ret;
+}
+
+int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct erdma_dev *dev = to_edev(id->device);
+ struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+ struct erdma_qp *qp;
+ struct erdma_qp_attrs qp_attrs;
+ int ret;
+
+ erdma_cep_set_inuse(cep);
+ erdma_cep_put(cep);
+
+ /* Free lingering inbound private data */
+ if (cep->mpa.hdr.params.pd_len) {
+ cep->mpa.hdr.params.pd_len = 0;
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ }
+ erdma_cancel_mpatimer(cep);
+
+ if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+
+ qp = find_qp_by_qpn(dev, params->qpn);
+ if (!qp)
+ return -ENOENT;
+ erdma_qp_get(qp);
+
+ down_write(&qp->state_lock);
+ if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ if (params->ord > dev->attrs.max_ord ||
+ params->ird > dev->attrs.max_ord) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ if (params->private_data_len > MPA_MAX_PRIVDATA) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ memset(&qp_attrs, 0, sizeof(qp_attrs));
+ qp_attrs.orq_size = params->ord;
+ qp_attrs.irq_size = params->ird;
+
+ qp_attrs.state = ERDMA_QP_STATE_RTS;
+
+ /* Associate QP with CEP */
+ erdma_cep_get(cep);
+ qp->cep = cep;
+ cep->qp = qp;
+
+ cep->state = ERDMA_EPSTATE_RDMA_MODE;
+
+ qp->attrs.qp_type = ERDMA_QP_PASSIVE;
+ qp->attrs.pd_len = params->private_data_len;
+
+ if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
+ qp->attrs.cc = COMPROMISE_CC;
+
+ /* move to rts */
+ ret = erdma_modify_qp_internal(qp, &qp_attrs,
+ ERDMA_QP_ATTR_STATE |
+ ERDMA_QP_ATTR_ORD |
+ ERDMA_QP_ATTR_LLP_HANDLE |
+ ERDMA_QP_ATTR_IRD |
+ ERDMA_QP_ATTR_MPA);
+ up_write(&qp->state_lock);
+
+ if (ret)
+ goto error;
+
+ cep->mpa.ext_data.bits = 0;
+ __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
+
+ ret = erdma_send_mpareqrep(cep, params->private_data,
+ params->private_data_len);
+ if (!ret) {
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
+ if (ret)
+ goto error;
+
+ erdma_cep_set_free(cep);
+
+ return 0;
+ }
+
+error:
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(id);
+ cep->cm_id = NULL;
+ }
+
+ if (qp->cep) {
+ erdma_cep_put(cep);
+ qp->cep = NULL;
+ }
+
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return ret;
+}
+
+int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
+{
+ struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+
+ erdma_cep_set_inuse(cep);
+ erdma_cep_put(cep);
+
+ erdma_cancel_mpatimer(cep);
+
+ if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+
+ if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
+ erdma_send_mpareqrep(cep, pdata, plen);
+ }
+
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return 0;
+}
+
+int erdma_create_listen(struct iw_cm_id *id, int backlog)
+{
+ struct socket *s;
+ struct erdma_cep *cep = NULL;
+ int ret = 0;
+ struct erdma_dev *dev = to_edev(id->device);
+ int addr_family = id->local_addr.ss_family;
+ struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
+
+ if (addr_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (ret < 0)
+ return ret;
+
+ sock_set_reuseaddr(s->sk);
+
+ /* For wildcard addr, limit binding to current device only */
+ if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
+ s->sk->sk_bound_dev_if = dev->netdev->ifindex;
+
+ ret = s->ops->bind(s, (struct sockaddr *)laddr,
+ sizeof(struct sockaddr_in));
+ if (ret)
+ goto error;
+
+ cep = erdma_cep_alloc(dev);
+ if (!cep) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ erdma_cep_socket_assoc(cep, s);
+
+ ret = erdma_cm_alloc_work(cep, backlog);
+ if (ret)
+ goto error;
+
+ ret = s->ops->listen(s, backlog);
+ if (ret)
+ goto error;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ if (!id->provider_data) {
+ id->provider_data =
+ kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!id->provider_data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ INIT_LIST_HEAD((struct list_head *)id->provider_data);
+ }
+
+ list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
+ cep->state = ERDMA_EPSTATE_LISTENING;
+
+ return 0;
+
+error:
+ if (cep) {
+ erdma_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ cep->sock = NULL;
+ erdma_socket_disassoc(s);
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+ }
+ sock_release(s);
+
+ return ret;
+}
+
+static void erdma_drop_listeners(struct iw_cm_id *id)
+{
+ struct list_head *p, *tmp;
+ /*
+ * In case of a wildcard rdma_listen on a multi-homed device,
+ * a listener's IWCM id is associated with more than one listening CEP.
+ */
+ list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
+ struct erdma_cep *cep =
+ list_entry(p, struct erdma_cep, listenq);
+
+ list_del(p);
+
+ erdma_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+ cep->state = ERDMA_EPSTATE_CLOSED;
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+ }
+}
+
+int erdma_destroy_listen(struct iw_cm_id *id)
+{
+ if (!id->provider_data)
+ return 0;
+
+ erdma_drop_listeners(id);
+ kfree(id->provider_data);
+ id->provider_data = NULL;
+
+ return 0;
+}
+
+int erdma_cm_init(void)
+{
+ erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
+ if (!erdma_cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void erdma_cm_exit(void)
+{
+ if (erdma_cm_wq)
+ destroy_workqueue(erdma_cm_wq);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h
new file mode 100644
index 000000000000..8a3f998fec9b
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cm.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#ifndef __ERDMA_CM_H__
+#define __ERDMA_CM_H__
+
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <rdma/iw_cm.h>
+
+/* iWarp MPA protocol defs */
+#define MPA_REVISION_EXT_1 129
+#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+#define MPA_KEY_SIZE 16
+#define MPA_DEFAULT_HDR_LEN 28
+
+struct mpa_rr_params {
+ __be16 bits;
+ __be16 pd_len;
+};
+
+/*
+ * MPA request/response Hdr bits & fields
+ */
+enum {
+ MPA_RR_FLAG_MARKERS = __cpu_to_be16(0x8000),
+ MPA_RR_FLAG_CRC = __cpu_to_be16(0x4000),
+ MPA_RR_FLAG_REJECT = __cpu_to_be16(0x2000),
+ MPA_RR_RESERVED = __cpu_to_be16(0x1f00),
+ MPA_RR_MASK_REVISION = __cpu_to_be16(0x00ff)
+};
+
+/*
+ * MPA request/reply header
+ */
+struct mpa_rr {
+ u8 key[16];
+ struct mpa_rr_params params;
+};
+
+struct erdma_mpa_ext {
+ __be32 cookie;
+ __be32 bits;
+};
+
+enum {
+ MPA_EXT_FLAG_CC = cpu_to_be32(0x0000000f),
+};
+
+struct erdma_mpa_info {
+ struct mpa_rr hdr; /* peer mpa hdr in host byte order */
+ struct erdma_mpa_ext ext_data;
+ char *pdata;
+ int bytes_rcvd;
+};
+
+struct erdma_sk_upcalls {
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk, int bytes);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+struct erdma_dev;
+
+enum erdma_cep_state {
+ ERDMA_EPSTATE_IDLE = 1,
+ ERDMA_EPSTATE_LISTENING,
+ ERDMA_EPSTATE_CONNECTING,
+ ERDMA_EPSTATE_AWAIT_MPAREQ,
+ ERDMA_EPSTATE_RECVD_MPAREQ,
+ ERDMA_EPSTATE_AWAIT_MPAREP,
+ ERDMA_EPSTATE_RDMA_MODE,
+ ERDMA_EPSTATE_CLOSED
+};
+
+struct erdma_cep {
+ struct iw_cm_id *cm_id;
+ struct erdma_dev *dev;
+ struct list_head devq;
+ spinlock_t lock;
+ struct kref ref;
+ int in_use;
+ wait_queue_head_t waitq;
+ enum erdma_cep_state state;
+
+ struct list_head listenq;
+ struct erdma_cep *listen_cep;
+
+ struct erdma_qp *qp;
+ struct socket *sock;
+
+ struct erdma_cm_work *mpa_timer;
+ struct list_head work_freelist;
+
+ struct erdma_mpa_info mpa;
+ int ord;
+ int ird;
+
+ int pd_len;
+ /* hold user's private data. */
+ void *private_data;
+
+ /* Saved upcalls of socket llp.sock */
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+#define MPAREQ_TIMEOUT (HZ * 20)
+#define MPAREP_TIMEOUT (HZ * 10)
+#define CONNECT_TIMEOUT (HZ * 10)
+
+enum erdma_work_type {
+ ERDMA_CM_WORK_ACCEPT = 1,
+ ERDMA_CM_WORK_READ_MPAHDR,
+ ERDMA_CM_WORK_CLOSE_LLP, /* close socket */
+ ERDMA_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
+ ERDMA_CM_WORK_MPATIMEOUT,
+ ERDMA_CM_WORK_CONNECTED,
+ ERDMA_CM_WORK_CONNECTTIMEOUT
+};
+
+struct erdma_cm_work {
+ struct delayed_work work;
+ struct list_head list;
+ enum erdma_work_type type;
+ struct erdma_cep *cep;
+};
+
+#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a)))
+
+static inline int getname_peer(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 1);
+}
+
+static inline int getname_local(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 0);
+}
+
+int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen);
+int erdma_create_listen(struct iw_cm_id *id, int backlog);
+int erdma_destroy_listen(struct iw_cm_id *id);
+
+void erdma_cep_get(struct erdma_cep *ceq);
+void erdma_cep_put(struct erdma_cep *ceq);
+int erdma_cm_queue_work(struct erdma_cep *ceq, enum erdma_work_type type);
+
+int erdma_cm_init(void);
+void erdma_cm_exit(void);
+
+#define sk_to_cep(sk) ((struct erdma_cep *)((sk)->sk_user_data))
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c
new file mode 100644
index 000000000000..6ebfa6989b11
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma.h"
+
+static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
+{
+ struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
+ u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
+ FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
+
+ *cmdq->cq.db_record = db_data;
+ writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
+
+ atomic64_inc(&cmdq->cq.armed_num);
+}
+
+static void kick_cmdq_db(struct erdma_cmdq *cmdq)
+{
+ struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
+ u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
+
+ *cmdq->sq.db_record = db_data;
+ writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
+}
+
+static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
+{
+ int comp_idx;
+
+ spin_lock(&cmdq->lock);
+ comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
+ cmdq->max_outstandings);
+ if (comp_idx == cmdq->max_outstandings) {
+ spin_unlock(&cmdq->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ __set_bit(comp_idx, cmdq->comp_wait_bitmap);
+ spin_unlock(&cmdq->lock);
+
+ return &cmdq->wait_pool[comp_idx];
+}
+
+static void put_comp_wait(struct erdma_cmdq *cmdq,
+ struct erdma_comp_wait *comp_wait)
+{
+ int used;
+
+ cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
+ spin_lock(&cmdq->lock);
+ used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
+ spin_unlock(&cmdq->lock);
+
+ WARN_ON(!used);
+}
+
+static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
+ struct erdma_cmdq *cmdq)
+{
+ int i;
+
+ cmdq->wait_pool =
+ devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
+ sizeof(struct erdma_comp_wait), GFP_KERNEL);
+ if (!cmdq->wait_pool)
+ return -ENOMEM;
+
+ spin_lock_init(&cmdq->lock);
+ cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
+ &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
+ if (!cmdq->comp_wait_bitmap)
+ return -ENOMEM;
+
+ for (i = 0; i < cmdq->max_outstandings; i++) {
+ init_completion(&cmdq->wait_pool[i].wait_event);
+ cmdq->wait_pool[i].ctx_id = i;
+ }
+
+ return 0;
+}
+
+static int erdma_cmdq_sq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_cmdq_sq *sq = &cmdq->sq;
+ u32 buf_size;
+
+ sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
+ sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
+
+ buf_size = sq->depth << SQEBB_SHIFT;
+
+ sq->qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
+ &sq->qbuf_dma_addr, GFP_KERNEL);
+ if (!sq->qbuf)
+ return -ENOMEM;
+
+ sq->db_record = (u64 *)(sq->qbuf + buf_size);
+
+ spin_lock_init(&sq->lock);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
+ upper_32_bits(sq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
+ lower_32_bits(sq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
+ erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
+ sq->qbuf_dma_addr + buf_size);
+
+ return 0;
+}
+
+static int erdma_cmdq_cq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_cmdq_cq *cq = &cmdq->cq;
+ u32 buf_size;
+
+ cq->depth = cmdq->sq.depth;
+ buf_size = cq->depth << CQE_SHIFT;
+
+ cq->qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
+ &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
+ if (!cq->qbuf)
+ return -ENOMEM;
+
+ spin_lock_init(&cq->lock);
+
+ cq->db_record = (u64 *)(cq->qbuf + buf_size);
+
+ atomic64_set(&cq->armed_num, 0);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
+ upper_32_bits(cq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
+ lower_32_bits(cq->qbuf_dma_addr));
+ erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
+ cq->qbuf_dma_addr + buf_size);
+
+ return 0;
+}
+
+static int erdma_cmdq_eq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_eq *eq = &cmdq->eq;
+ u32 buf_size;
+
+ eq->depth = cmdq->max_outstandings;
+ buf_size = eq->depth << EQE_SHIFT;
+
+ eq->qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
+ &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
+ if (!eq->qbuf)
+ return -ENOMEM;
+
+ spin_lock_init(&eq->lock);
+ atomic64_set(&eq->event_num, 0);
+
+ eq->db_addr =
+ (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
+ eq->db_record = (u64 *)(eq->qbuf + buf_size);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
+ upper_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
+ lower_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
+ erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
+ eq->qbuf_dma_addr + buf_size);
+
+ return 0;
+}
+
+int erdma_cmdq_init(struct erdma_dev *dev)
+{
+ int err, i;
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ u32 sts, ctrl;
+
+ cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
+ cmdq->use_event = false;
+
+ sema_init(&cmdq->credits, cmdq->max_outstandings);
+
+ err = erdma_cmdq_wait_res_init(dev, cmdq);
+ if (err)
+ return err;
+
+ err = erdma_cmdq_sq_init(dev);
+ if (err)
+ return err;
+
+ err = erdma_cmdq_cq_init(dev);
+ if (err)
+ goto err_destroy_sq;
+
+ err = erdma_cmdq_eq_init(dev);
+ if (err)
+ goto err_destroy_cq;
+
+ ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
+ erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
+
+ for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
+ sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
+ ERDMA_REG_DEV_ST_INIT_DONE_MASK);
+ if (sts)
+ break;
+
+ msleep(ERDMA_REG_ACCESS_WAIT_MS);
+ }
+
+ if (i == ERDMA_WAIT_DEV_DONE_CNT) {
+ dev_err(&dev->pdev->dev, "wait init done failed.\n");
+ err = -ETIMEDOUT;
+ goto err_destroy_eq;
+ }
+
+ set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+
+ return 0;
+
+err_destroy_eq:
+ dma_free_coherent(&dev->pdev->dev,
+ (cmdq->eq.depth << EQE_SHIFT) +
+ ERDMA_EXTRA_BUFFER_SIZE,
+ cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
+
+err_destroy_cq:
+ dma_free_coherent(&dev->pdev->dev,
+ (cmdq->cq.depth << CQE_SHIFT) +
+ ERDMA_EXTRA_BUFFER_SIZE,
+ cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+
+err_destroy_sq:
+ dma_free_coherent(&dev->pdev->dev,
+ (cmdq->sq.depth << SQEBB_SHIFT) +
+ ERDMA_EXTRA_BUFFER_SIZE,
+ cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+
+ return err;
+}
+
+void erdma_finish_cmdq_init(struct erdma_dev *dev)
+{
+ /* after device init successfully, change cmdq to event mode. */
+ dev->cmdq.use_event = true;
+ arm_cmdq_cq(&dev->cmdq);
+}
+
+void erdma_cmdq_destroy(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+
+ dma_free_coherent(&dev->pdev->dev,
+ (cmdq->eq.depth << EQE_SHIFT) +
+ ERDMA_EXTRA_BUFFER_SIZE,
+ cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
+ dma_free_coherent(&dev->pdev->dev,
+ (cmdq->sq.depth << SQEBB_SHIFT) +
+ ERDMA_EXTRA_BUFFER_SIZE,
+ cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+ dma_free_coherent(&dev->pdev->dev,
+ (cmdq->cq.depth << CQE_SHIFT) +
+ ERDMA_EXTRA_BUFFER_SIZE,
+ cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+}
+
+static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
+{
+ __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
+ cmdq->cq.depth, CQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ __be32_to_cpu(READ_ONCE(*cqe)));
+
+ return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
+}
+
+static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
+ struct erdma_comp_wait *comp_wait)
+{
+ __le64 *wqe;
+ u64 hdr = *req;
+
+ comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
+ reinit_completion(&comp_wait->wait_event);
+ comp_wait->sq_pi = cmdq->sq.pi;
+
+ wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
+ SQEBB_SHIFT);
+ memcpy(wqe, req, req_len);
+
+ cmdq->sq.pi += cmdq->sq.wqebb_cnt;
+ hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
+ FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
+ comp_wait->ctx_id) |
+ FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
+ *wqe = cpu_to_le64(hdr);
+
+ kick_cmdq_db(cmdq);
+}
+
+static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
+{
+ struct erdma_comp_wait *comp_wait;
+ u32 hdr0, sqe_idx;
+ __be32 *cqe;
+ u16 ctx_id;
+ u64 *sqe;
+ int i;
+
+ cqe = get_next_valid_cmdq_cqe(cmdq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cmdq->cq.ci++;
+
+ dma_rmb();
+ hdr0 = __be32_to_cpu(*cqe);
+ sqe_idx = __be32_to_cpu(*(cqe + 1));
+
+ sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
+ SQEBB_SHIFT);
+ ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
+ comp_wait = &cmdq->wait_pool[ctx_id];
+ if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
+ return -EIO;
+
+ comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
+ comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
+ cmdq->sq.ci += cmdq->sq.wqebb_cnt;
+
+ for (i = 0; i < 4; i++)
+ comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
+
+ if (cmdq->use_event)
+ complete(&comp_wait->wait_event);
+
+ return 0;
+}
+
+static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
+{
+ unsigned long flags;
+ u16 comp_num;
+
+ spin_lock_irqsave(&cmdq->cq.lock, flags);
+
+ /* We must have less than # of max_outstandings
+ * completions at one time.
+ */
+ for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
+ if (erdma_poll_single_cmd_completion(cmdq))
+ break;
+
+ if (comp_num && cmdq->use_event)
+ arm_cmdq_cq(cmdq);
+
+ spin_unlock_irqrestore(&cmdq->cq.lock, flags);
+}
+
+void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
+{
+ int got_event = 0;
+
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
+ !cmdq->use_event)
+ return;
+
+ while (get_next_valid_eqe(&cmdq->eq)) {
+ cmdq->eq.ci++;
+ got_event++;
+ }
+
+ if (got_event) {
+ cmdq->cq.cmdsn++;
+ erdma_polling_cmd_completions(cmdq);
+ }
+
+ notify_eq(&cmdq->eq);
+}
+
+static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
+ struct erdma_cmdq *cmdq, u32 timeout)
+{
+ unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
+
+ while (1) {
+ erdma_polling_cmd_completions(cmdq);
+ if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
+ break;
+
+ if (time_is_before_jiffies(comp_timeout))
+ return -ETIME;
+
+ msleep(20);
+ }
+
+ return 0;
+}
+
+static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
+ struct erdma_cmdq *cmdq, u32 timeout)
+{
+ unsigned long flags = 0;
+
+ wait_for_completion_timeout(&comp_ctx->wait_event,
+ msecs_to_jiffies(timeout));
+
+ if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
+ spin_lock_irqsave(&cmdq->cq.lock, flags);
+ comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
+ spin_unlock_irqrestore(&cmdq->cq.lock, flags);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
+{
+ *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
+ FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
+}
+
+int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
+ u64 *resp0, u64 *resp1)
+{
+ struct erdma_comp_wait *comp_wait;
+ int ret;
+
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
+ return -ENODEV;
+
+ down(&cmdq->credits);
+
+ comp_wait = get_comp_wait(cmdq);
+ if (IS_ERR(comp_wait)) {
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+ set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
+ up(&cmdq->credits);
+ return PTR_ERR(comp_wait);
+ }
+
+ spin_lock(&cmdq->sq.lock);
+ push_cmdq_sqe(cmdq, req, req_size, comp_wait);
+ spin_unlock(&cmdq->sq.lock);
+
+ if (cmdq->use_event)
+ ret = erdma_wait_cmd_completion(comp_wait, cmdq,
+ ERDMA_CMDQ_TIMEOUT_MS);
+ else
+ ret = erdma_poll_cmd_completion(comp_wait, cmdq,
+ ERDMA_CMDQ_TIMEOUT_MS);
+
+ if (ret) {
+ set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+ goto out;
+ }
+
+ if (comp_wait->comp_status)
+ ret = -EIO;
+
+ if (resp0 && resp1) {
+ *resp0 = *((u64 *)&comp_wait->comp_data[0]);
+ *resp1 = *((u64 *)&comp_wait->comp_data[2]);
+ }
+ put_comp_wait(cmdq, comp_wait);
+
+out:
+ up(&cmdq->credits);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
new file mode 100644
index 000000000000..58e0dc5c75d1
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma_verbs.h"
+
+static void *get_next_valid_cqe(struct erdma_cq *cq)
+{
+ __be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci,
+ cq->depth, CQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ __be32_to_cpu(READ_ONCE(*cqe)));
+
+ return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL;
+}
+
+static void notify_cq(struct erdma_cq *cq, u8 solcitied)
+{
+ u64 db_data =
+ FIELD_PREP(ERDMA_CQDB_IDX_MASK, (cq->kern_cq.notify_cnt)) |
+ FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) |
+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
+ FIELD_PREP(ERDMA_CQDB_SOL_MASK, solcitied) |
+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) |
+ FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci);
+
+ *cq->kern_cq.db_record = db_data;
+ writeq(db_data, cq->kern_cq.db);
+}
+
+int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ unsigned long irq_flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, irq_flags);
+
+ notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq))
+ ret = 1;
+
+ cq->kern_cq.notify_cnt++;
+
+ spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags);
+
+ return ret;
+}
+
+static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
+ [ERDMA_OP_WRITE] = IB_WC_RDMA_WRITE,
+ [ERDMA_OP_READ] = IB_WC_RDMA_READ,
+ [ERDMA_OP_SEND] = IB_WC_SEND,
+ [ERDMA_OP_SEND_WITH_IMM] = IB_WC_SEND,
+ [ERDMA_OP_RECEIVE] = IB_WC_RECV,
+ [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM,
+ [ERDMA_OP_RECV_INV] = IB_WC_RECV,
+ [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV,
+ [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND,
+ [ERDMA_OP_REG_MR] = IB_WC_REG_MR,
+ [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
+};
+
+static const struct {
+ enum erdma_wc_status erdma;
+ enum ib_wc_status base;
+ enum erdma_vendor_err vendor;
+} map_cqe_status[ERDMA_NUM_WC_STATUS] = {
+ { ERDMA_WC_SUCCESS, IB_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_GENERAL_ERR, IB_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_RECV_WQE_FORMAT_ERR, IB_WC_GENERAL_ERR,
+ ERDMA_WC_VENDOR_INVALID_RQE },
+ { ERDMA_WC_RECV_STAG_INVALID_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_INVALID_STAG },
+ { ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION },
+ { ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR },
+ { ERDMA_WC_RECV_PDID_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_INVALID_PD },
+ { ERDMA_WC_RECV_WARRPING_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_WRAP_ERR },
+ { ERDMA_WC_SEND_WQE_FORMAT_ERR, IB_WC_LOC_QP_OP_ERR,
+ ERDMA_WC_VENDOR_INVALID_SQE },
+ { ERDMA_WC_SEND_WQE_ORD_EXCEED, IB_WC_GENERAL_ERR,
+ ERDMA_WC_VENDOR_ZERO_ORD },
+ { ERDMA_WC_SEND_STAG_INVALID_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_INVALID_STAG },
+ { ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION },
+ { ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR },
+ { ERDMA_WC_SEND_PDID_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_INVALID_PD },
+ { ERDMA_WC_SEND_WARRPING_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_WARP_ERR },
+ { ERDMA_WC_FLUSH_ERR, IB_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
+};
+
+#define ERDMA_POLLCQ_NO_QP 1
+
+static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+ u8 opcode, syndrome, qtype;
+ struct erdma_kqp *kern_qp;
+ struct erdma_cqe *cqe;
+ struct erdma_qp *qp;
+ u16 wqe_idx, depth;
+ u32 qpn, cqe_hdr;
+ u64 *id_table;
+ u64 *wqe_hdr;
+
+ cqe = get_next_valid_cqe(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cq->kern_cq.ci++;
+
+ /* cqbuf should be ready when we poll */
+ dma_rmb();
+
+ qpn = be32_to_cpu(cqe->qpn);
+ wqe_idx = be32_to_cpu(cqe->qe_idx);
+ cqe_hdr = be32_to_cpu(cqe->hdr);
+
+ qp = find_qp_by_qpn(dev, qpn);
+ if (!qp)
+ return ERDMA_POLLCQ_NO_QP;
+
+ kern_qp = &qp->kern_qp;
+
+ qtype = FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr);
+ syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr);
+ opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr);
+
+ if (qtype == ERDMA_CQE_QTYPE_SQ) {
+ id_table = kern_qp->swr_tbl;
+ depth = qp->attrs.sq_size;
+ wqe_hdr = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ kern_qp->sq_ci =
+ FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *wqe_hdr) +
+ wqe_idx + 1;
+ } else {
+ id_table = kern_qp->rwr_tbl;
+ depth = qp->attrs.rq_size;
+ }
+ wc->wr_id = id_table[wqe_idx & (depth - 1)];
+ wc->byte_len = be32_to_cpu(cqe->size);
+
+ wc->wc_flags = 0;
+
+ wc->opcode = wc_mapping_table[opcode];
+ if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) {
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->imm_data));
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ } else if (opcode == ERDMA_OP_RECV_INV) {
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->inv_rkey);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+
+ if (syndrome >= ERDMA_NUM_WC_STATUS)
+ syndrome = ERDMA_WC_GENERAL_ERR;
+
+ wc->status = map_cqe_status[syndrome].base;
+ wc->vendor_err = map_cqe_status[syndrome].vendor;
+ wc->qp = &qp->ibqp;
+
+ return 0;
+}
+
+int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ unsigned long flags;
+ int npolled, ret;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ for (npolled = 0; npolled < num_entries;) {
+ ret = erdma_poll_one_cqe(cq, wc + npolled);
+
+ if (ret == -EAGAIN) /* no received new CQEs. */
+ break;
+ else if (ret) /* ignore invalid CQEs. */
+ continue;
+
+ npolled++;
+ }
+
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+
+ return npolled;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
new file mode 100644
index 000000000000..ed54130d924b
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma_verbs.h"
+
+#define MAX_POLL_CHUNK_SIZE 16
+
+void notify_eq(struct erdma_eq *eq)
+{
+ u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) |
+ FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1);
+
+ *eq->db_record = db_data;
+ writeq(db_data, eq->db_addr);
+
+ atomic64_inc(&eq->notify_num);
+}
+
+void *get_next_valid_eqe(struct erdma_eq *eq)
+{
+ u64 *eqe = get_queue_entry(eq->qbuf, eq->ci, eq->depth, EQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CEQE_HDR_O_MASK, READ_ONCE(*eqe));
+
+ return owner ^ !!(eq->ci & eq->depth) ? eqe : NULL;
+}
+
+void erdma_aeq_event_handler(struct erdma_dev *dev)
+{
+ struct erdma_aeqe *aeqe;
+ u32 cqn, qpn;
+ struct erdma_qp *qp;
+ struct erdma_cq *cq;
+ struct ib_event event;
+ u32 poll_cnt = 0;
+
+ memset(&event, 0, sizeof(event));
+
+ while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+ aeqe = get_next_valid_eqe(&dev->aeq);
+ if (!aeqe)
+ break;
+
+ dma_rmb();
+
+ dev->aeq.ci++;
+ atomic64_inc(&dev->aeq.event_num);
+ poll_cnt++;
+
+ if (FIELD_GET(ERDMA_AEQE_HDR_TYPE_MASK,
+ le32_to_cpu(aeqe->hdr)) == ERDMA_AE_TYPE_CQ_ERR) {
+ cqn = le32_to_cpu(aeqe->event_data0);
+ cq = find_cq_by_cqn(dev, cqn);
+ if (!cq)
+ continue;
+
+ event.device = cq->ibcq.device;
+ event.element.cq = &cq->ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&event,
+ cq->ibcq.cq_context);
+ } else {
+ qpn = le32_to_cpu(aeqe->event_data0);
+ qp = find_qp_by_qpn(dev, qpn);
+ if (!qp)
+ continue;
+
+ event.device = qp->ibqp.device;
+ event.element.qp = &qp->ibqp;
+ event.event = IB_EVENT_QP_FATAL;
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&event,
+ qp->ibqp.qp_context);
+ }
+ }
+
+ notify_eq(&dev->aeq);
+}
+
+int erdma_aeq_init(struct erdma_dev *dev)
+{
+ struct erdma_eq *eq = &dev->aeq;
+ u32 buf_size;
+
+ eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
+ buf_size = eq->depth << EQE_SHIFT;
+
+ eq->qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
+ &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
+ if (!eq->qbuf)
+ return -ENOMEM;
+
+ spin_lock_init(&eq->lock);
+ atomic64_set(&eq->event_num, 0);
+ atomic64_set(&eq->notify_num, 0);
+
+ eq->db_addr = (u64 __iomem *)(dev->func_bar + ERDMA_REGS_AEQ_DB_REG);
+ eq->db_record = (u64 *)(eq->qbuf + buf_size);
+
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG,
+ upper_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG,
+ lower_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth);
+ erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG,
+ eq->qbuf_dma_addr + buf_size);
+
+ return 0;
+}
+
+void erdma_aeq_destroy(struct erdma_dev *dev)
+{
+ struct erdma_eq *eq = &dev->aeq;
+
+ dma_free_coherent(&dev->pdev->dev,
+ WARPPED_BUFSIZE(eq->depth << EQE_SHIFT), eq->qbuf,
+ eq->qbuf_dma_addr);
+}
+
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb)
+{
+ struct erdma_dev *dev = ceq_cb->dev;
+ struct erdma_cq *cq;
+ u32 poll_cnt = 0;
+ u64 *ceqe;
+ int cqn;
+
+ if (!ceq_cb->ready)
+ return;
+
+ while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+ ceqe = get_next_valid_eqe(&ceq_cb->eq);
+ if (!ceqe)
+ break;
+
+ dma_rmb();
+ ceq_cb->eq.ci++;
+ poll_cnt++;
+ cqn = FIELD_GET(ERDMA_CEQE_HDR_CQN_MASK, READ_ONCE(*ceqe));
+
+ cq = find_cq_by_cqn(dev, cqn);
+ if (!cq)
+ continue;
+
+ if (rdma_is_kernel_res(&cq->ibcq.res))
+ cq->kern_cq.cmdsn++;
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ }
+
+ notify_eq(&ceq_cb->eq);
+}
+
+static irqreturn_t erdma_intr_ceq_handler(int irq, void *data)
+{
+ struct erdma_eq_cb *ceq_cb = data;
+
+ tasklet_schedule(&ceq_cb->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+static void erdma_intr_ceq_task(unsigned long data)
+{
+ erdma_ceq_completion_handler((struct erdma_eq_cb *)data);
+}
+
+static int erdma_set_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+ int err;
+
+ snprintf(eqc->irq.name, ERDMA_IRQNAME_SIZE, "erdma-ceq%u@pci:%s", ceqn,
+ pci_name(dev->pdev));
+ eqc->irq.msix_vector = pci_irq_vector(dev->pdev, ceqn + 1);
+
+ tasklet_init(&dev->ceqs[ceqn].tasklet, erdma_intr_ceq_task,
+ (unsigned long)&dev->ceqs[ceqn]);
+
+ cpumask_set_cpu(cpumask_local_spread(ceqn + 1, dev->attrs.numa_node),
+ &eqc->irq.affinity_hint_mask);
+
+ err = request_irq(eqc->irq.msix_vector, erdma_intr_ceq_handler, 0,
+ eqc->irq.name, eqc);
+ if (err) {
+ dev_err(&dev->pdev->dev, "failed to request_irq(%d)\n", err);
+ return err;
+ }
+
+ irq_set_affinity_hint(eqc->irq.msix_vector,
+ &eqc->irq.affinity_hint_mask);
+
+ return 0;
+}
+
+static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+
+ irq_set_affinity_hint(eqc->irq.msix_vector, NULL);
+ free_irq(eqc->irq.msix_vector, eqc);
+}
+
+static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
+{
+ struct erdma_cmdq_create_eq_req req;
+ dma_addr_t db_info_dma_addr;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CREATE_EQ);
+ req.eqn = eqn;
+ req.depth = ilog2(eq->depth);
+ req.qbuf_addr = eq->qbuf_dma_addr;
+ req.qtype = ERDMA_EQ_TYPE_CEQ;
+ /* Vector index is the same as EQN. */
+ req.vector_idx = eqn;
+ db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT);
+ req.db_dma_addr_l = lower_32_bits(db_info_dma_addr);
+ req.db_dma_addr_h = upper_32_bits(db_info_dma_addr);
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
+static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+ u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
+ int ret;
+
+ eq->qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
+ &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
+ if (!eq->qbuf)
+ return -ENOMEM;
+
+ spin_lock_init(&eq->lock);
+ atomic64_set(&eq->event_num, 0);
+ atomic64_set(&eq->notify_num, 0);
+
+ eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
+ eq->db_addr =
+ (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG +
+ (ceqn + 1) * ERDMA_DB_SIZE);
+ eq->db_record = (u64 *)(eq->qbuf + buf_size);
+ eq->ci = 0;
+ dev->ceqs[ceqn].dev = dev;
+
+ /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+ ret = create_eq_cmd(dev, ceqn + 1, eq);
+ dev->ceqs[ceqn].ready = ret ? false : true;
+
+ return ret;
+}
+
+static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+ u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
+ struct erdma_cmdq_destroy_eq_req req;
+ int err;
+
+ dev->ceqs[ceqn].ready = 0;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_DESTROY_EQ);
+ /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+ req.eqn = ceqn + 1;
+ req.qtype = ERDMA_EQ_TYPE_CEQ;
+ req.vector_idx = ceqn + 1;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ if (err)
+ return;
+
+ dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf,
+ eq->qbuf_dma_addr);
+}
+
+int erdma_ceqs_init(struct erdma_dev *dev)
+{
+ u32 i, j;
+ int err;
+
+ for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+ err = erdma_ceq_init_one(dev, i);
+ if (err)
+ goto out_err;
+
+ err = erdma_set_ceq_irq(dev, i);
+ if (err) {
+ erdma_ceq_uninit_one(dev, i);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (j = 0; j < i; j++) {
+ erdma_free_ceq_irq(dev, j);
+ erdma_ceq_uninit_one(dev, j);
+ }
+
+ return err;
+}
+
+void erdma_ceqs_uninit(struct erdma_dev *dev)
+{
+ u32 i;
+
+ for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+ erdma_free_ceq_irq(dev, i);
+ erdma_ceq_uninit_one(dev, i);
+ }
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
new file mode 100644
index 000000000000..e788887732e1
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_HW_H__
+#define __ERDMA_HW_H__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+/* PCIe device related definition. */
+#define PCI_VENDOR_ID_ALIBABA 0x1ded
+
+#define ERDMA_PCI_WIDTH 64
+#define ERDMA_FUNC_BAR 0
+#define ERDMA_MISX_BAR 2
+
+#define ERDMA_BAR_MASK (BIT(ERDMA_FUNC_BAR) | BIT(ERDMA_MISX_BAR))
+
+/* MSI-X related. */
+#define ERDMA_NUM_MSIX_VEC 32U
+#define ERDMA_MSIX_VECTOR_CMDQ 0
+
+/* PCIe Bar0 Registers. */
+#define ERDMA_REGS_VERSION_REG 0x0
+#define ERDMA_REGS_DEV_CTRL_REG 0x10
+#define ERDMA_REGS_DEV_ST_REG 0x14
+#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
+#define ERDMA_REGS_NETDEV_MAC_H_REG 0x1C
+#define ERDMA_REGS_CMDQ_SQ_ADDR_L_REG 0x20
+#define ERDMA_REGS_CMDQ_SQ_ADDR_H_REG 0x24
+#define ERDMA_REGS_CMDQ_CQ_ADDR_L_REG 0x28
+#define ERDMA_REGS_CMDQ_CQ_ADDR_H_REG 0x2C
+#define ERDMA_REGS_CMDQ_DEPTH_REG 0x30
+#define ERDMA_REGS_CMDQ_EQ_DEPTH_REG 0x34
+#define ERDMA_REGS_CMDQ_EQ_ADDR_L_REG 0x38
+#define ERDMA_REGS_CMDQ_EQ_ADDR_H_REG 0x3C
+#define ERDMA_REGS_AEQ_ADDR_L_REG 0x40
+#define ERDMA_REGS_AEQ_ADDR_H_REG 0x44
+#define ERDMA_REGS_AEQ_DEPTH_REG 0x48
+#define ERDMA_REGS_GRP_NUM_REG 0x4c
+#define ERDMA_REGS_AEQ_DB_REG 0x50
+#define ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG 0x60
+#define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68
+#define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70
+#define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78
+#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80
+#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88
+#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90
+#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98
+#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0
+#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8
+#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0
+#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8
+#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0
+#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8
+#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0
+#define ERDMA_REGS_CEQ_DB_BASE_REG 0x100
+#define ERDMA_CMDQ_SQDB_REG 0x200
+#define ERDMA_CMDQ_CQDB_REG 0x300
+
+/* DEV_CTRL_REG details. */
+#define ERDMA_REG_DEV_CTRL_RESET_MASK 0x00000001
+#define ERDMA_REG_DEV_CTRL_INIT_MASK 0x00000002
+
+/* DEV_ST_REG details. */
+#define ERDMA_REG_DEV_ST_RESET_DONE_MASK 0x00000001U
+#define ERDMA_REG_DEV_ST_INIT_DONE_MASK 0x00000002U
+
+/* eRDMA PCIe DBs definition. */
+#define ERDMA_BAR_DB_SPACE_BASE 4096
+
+#define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE
+#define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024)
+
+#define ERDMA_BAR_RQDB_SPACE_OFFSET \
+ (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE)
+#define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024)
+
+#define ERDMA_BAR_CQDB_SPACE_OFFSET \
+ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE)
+
+/* Doorbell page resources related. */
+/*
+ * Max # of parallelly issued directSQE is 3072 per device,
+ * hardware organizes this into 24 group, per group has 128 credits.
+ */
+#define ERDMA_DWQE_MAX_GRP_CNT 24
+#define ERDMA_DWQE_NUM_PER_GRP 128
+
+#define ERDMA_DWQE_TYPE0_CNT 64
+#define ERDMA_DWQE_TYPE1_CNT 496
+/* type1 DB contains 2 DBs, takes 256Byte. */
+#define ERDMA_DWQE_TYPE1_CNT_PER_PAGE 16
+
+#define ERDMA_SDB_SHARED_PAGE_INDEX 95
+
+/* Doorbell related. */
+#define ERDMA_DB_SIZE 8
+
+#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56)
+#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32)
+#define ERDMA_CQDB_ARM_MASK BIT_ULL(31)
+#define ERDMA_CQDB_SOL_MASK BIT_ULL(30)
+#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28)
+#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0)
+
+#define ERDMA_EQDB_ARM_MASK BIT(31)
+#define ERDMA_EQDB_CI_MASK GENMASK_ULL(23, 0)
+
+#define ERDMA_PAGE_SIZE_SUPPORT 0x7FFFF000
+
+/* WQE related. */
+#define EQE_SIZE 16
+#define EQE_SHIFT 4
+#define RQE_SIZE 32
+#define RQE_SHIFT 5
+#define CQE_SIZE 32
+#define CQE_SHIFT 5
+#define SQEBB_SIZE 32
+#define SQEBB_SHIFT 5
+#define SQEBB_MASK (~(SQEBB_SIZE - 1))
+#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK)
+#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT)
+
+#define ERDMA_MAX_SQE_SIZE 128
+#define ERDMA_MAX_WQEBB_PER_SQE 4
+
+/* CMDQ related. */
+#define ERDMA_CMDQ_MAX_OUTSTANDING 128
+#define ERDMA_CMDQ_SQE_SIZE 64
+
+/* cmdq sub module definition. */
+enum CMDQ_WQE_SUB_MOD {
+ CMDQ_SUBMOD_RDMA = 0,
+ CMDQ_SUBMOD_COMMON = 1
+};
+
+enum CMDQ_RDMA_OPCODE {
+ CMDQ_OPCODE_QUERY_DEVICE = 0,
+ CMDQ_OPCODE_CREATE_QP = 1,
+ CMDQ_OPCODE_DESTROY_QP = 2,
+ CMDQ_OPCODE_MODIFY_QP = 3,
+ CMDQ_OPCODE_CREATE_CQ = 4,
+ CMDQ_OPCODE_DESTROY_CQ = 5,
+ CMDQ_OPCODE_REG_MR = 8,
+ CMDQ_OPCODE_DEREG_MR = 9
+};
+
+enum CMDQ_COMMON_OPCODE {
+ CMDQ_OPCODE_CREATE_EQ = 0,
+ CMDQ_OPCODE_DESTROY_EQ = 1,
+ CMDQ_OPCODE_QUERY_FW_INFO = 2,
+ CMDQ_OPCODE_CONF_MTU = 3,
+};
+
+/* cmdq-SQE HDR */
+#define ERDMA_CMD_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
+#define ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK GENMASK_ULL(47, 32)
+#define ERDMA_CMD_HDR_SUB_MOD_MASK GENMASK_ULL(25, 24)
+#define ERDMA_CMD_HDR_OPCODE_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
+
+struct erdma_cmdq_destroy_cq_req {
+ u64 hdr;
+ u32 cqn;
+};
+
+#define ERDMA_EQ_TYPE_AEQ 0
+#define ERDMA_EQ_TYPE_CEQ 1
+
+struct erdma_cmdq_create_eq_req {
+ u64 hdr;
+ u64 qbuf_addr;
+ u8 vector_idx;
+ u8 eqn;
+ u8 depth;
+ u8 qtype;
+ u32 db_dma_addr_l;
+ u32 db_dma_addr_h;
+};
+
+struct erdma_cmdq_destroy_eq_req {
+ u64 hdr;
+ u64 rsvd0;
+ u8 vector_idx;
+ u8 eqn;
+ u8 rsvd1;
+ u8 qtype;
+};
+
+struct erdma_cmdq_config_mtu_req {
+ u64 hdr;
+ u32 mtu;
+};
+
+/* create_cq cfg0 */
+#define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24)
+#define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20)
+#define ERDMA_CMD_CREATE_CQ_CQN_MASK GENMASK(19, 0)
+
+/* create_cq cfg1 */
+#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
+#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15)
+#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
+
+struct erdma_cmdq_create_cq_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 qbuf_addr_l;
+ u32 qbuf_addr_h;
+ u32 cfg1;
+ u64 cq_db_info_addr;
+ u32 first_page_offset;
+};
+
+/* regmr/deregmr cfg0 */
+#define ERDMA_CMD_MR_VALID_MASK BIT(31)
+#define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20)
+#define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0)
+
+/* regmr cfg1 */
+#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
+#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
+#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2)
+#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0)
+
+/* regmr cfg2 */
+#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
+#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20)
+#define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_reg_mr_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u64 start_va;
+ u32 size;
+ u32 cfg2;
+ u64 phy_addr[4];
+};
+
+struct erdma_cmdq_dereg_mr_req {
+ u64 hdr;
+ u32 cfg;
+};
+
+/* modify qp cfg */
+#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
+#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
+#define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_modify_qp_req {
+ u64 hdr;
+ u32 cfg;
+ u32 cookie;
+ __be32 dip;
+ __be32 sip;
+ __be16 sport;
+ __be16 dport;
+ u32 send_nxt;
+ u32 recv_nxt;
+};
+
+/* create qp cfg0 */
+#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
+#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
+
+/* create qp cfg1 */
+#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
+#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
+
+/* create qp cqn_mtt_cfg */
+#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
+#define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0)
+
+/* create qp mtt_cfg */
+#define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12)
+#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1)
+#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0)
+
+#define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0)
+
+struct erdma_cmdq_create_qp_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 sq_cqn_mtt_cfg;
+ u32 rq_cqn_mtt_cfg;
+ u64 sq_buf_addr;
+ u64 rq_buf_addr;
+ u32 sq_mtt_cfg;
+ u32 rq_mtt_cfg;
+ u64 sq_db_info_dma_addr;
+ u64 rq_db_info_dma_addr;
+};
+
+struct erdma_cmdq_destroy_qp_req {
+ u64 hdr;
+ u32 qpn;
+};
+
+/* cap qword 0 definition */
+#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
+#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
+
+/* cap qword 1 definition */
+#define ERDMA_CMD_DEV_CAP_DMA_LOCAL_KEY_MASK GENMASK_ULL(63, 32)
+#define ERDMA_CMD_DEV_CAP_DEFAULT_CC_MASK GENMASK_ULL(31, 28)
+#define ERDMA_CMD_DEV_CAP_QBLOCK_MASK GENMASK_ULL(27, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_MW_MASK GENMASK_ULL(7, 0)
+
+#define ERDMA_NQP_PER_QBLOCK 1024
+
+#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
+
+/* CQE hdr */
+#define ERDMA_CQE_HDR_OWNER_MASK BIT(31)
+#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16)
+#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8)
+#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0)
+
+#define ERDMA_CQE_QTYPE_SQ 0
+#define ERDMA_CQE_QTYPE_RQ 1
+#define ERDMA_CQE_QTYPE_CMDQ 2
+
+struct erdma_cqe {
+ __be32 hdr;
+ __be32 qe_idx;
+ __be32 qpn;
+ union {
+ __le32 imm_data;
+ __be32 inv_rkey;
+ };
+ __be32 size;
+ __be32 rsvd[3];
+};
+
+struct erdma_sge {
+ __aligned_le64 laddr;
+ __le32 length;
+ __le32 lkey;
+};
+
+/* Receive Queue Element */
+struct erdma_rqe {
+ __le16 qe_idx;
+ __le16 rsvd0;
+ __le32 qpn;
+ __le32 rsvd1;
+ __le32 rsvd2;
+ __le64 to;
+ __le32 length;
+ __le32 stag;
+};
+
+/* SQE */
+#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56)
+#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
+#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32)
+#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27)
+#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26)
+#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25)
+#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24)
+#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23)
+#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22)
+#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
+
+/* REG MR attrs */
+#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0)
+#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2)
+#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
+#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
+
+struct erdma_write_sqe {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+
+ __le32 sink_stag;
+ __le32 sink_to_l;
+ __le32 sink_to_h;
+
+ __le32 rsvd;
+
+ struct erdma_sge sgl[0];
+};
+
+struct erdma_send_sqe {
+ __le64 hdr;
+ union {
+ __be32 imm_data;
+ __le32 invalid_stag;
+ };
+
+ __le32 length;
+ struct erdma_sge sgl[0];
+};
+
+struct erdma_readreq_sqe {
+ __le64 hdr;
+ __le32 invalid_stag;
+ __le32 length;
+ __le32 sink_stag;
+ __le32 sink_to_l;
+ __le32 sink_to_h;
+ __le32 rsvd;
+};
+
+struct erdma_reg_mr_sqe {
+ __le64 hdr;
+ __le64 addr;
+ __le32 length;
+ __le32 stag;
+ __le32 attrs;
+ __le32 rsvd;
+};
+
+/* EQ related. */
+#define ERDMA_DEFAULT_EQ_DEPTH 256
+
+/* ceqe */
+#define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63)
+#define ERDMA_CEQE_HDR_PI_MASK GENMASK_ULL(55, 32)
+#define ERDMA_CEQE_HDR_O_MASK BIT_ULL(31)
+#define ERDMA_CEQE_HDR_CQN_MASK GENMASK_ULL(19, 0)
+
+/* aeqe */
+#define ERDMA_AEQE_HDR_O_MASK BIT(31)
+#define ERDMA_AEQE_HDR_TYPE_MASK GENMASK(23, 16)
+#define ERDMA_AEQE_HDR_SUBTYPE_MASK GENMASK(7, 0)
+
+#define ERDMA_AE_TYPE_QP_FATAL_EVENT 0
+#define ERDMA_AE_TYPE_QP_ERQ_ERR_EVENT 1
+#define ERDMA_AE_TYPE_ACC_ERR_EVENT 2
+#define ERDMA_AE_TYPE_CQ_ERR 3
+#define ERDMA_AE_TYPE_OTHER_ERROR 4
+
+struct erdma_aeqe {
+ __le32 hdr;
+ __le32 event_data0;
+ __le32 event_data1;
+ __le32 rsvd;
+};
+
+enum erdma_opcode {
+ ERDMA_OP_WRITE = 0,
+ ERDMA_OP_READ = 1,
+ ERDMA_OP_SEND = 2,
+ ERDMA_OP_SEND_WITH_IMM = 3,
+
+ ERDMA_OP_RECEIVE = 4,
+ ERDMA_OP_RECV_IMM = 5,
+ ERDMA_OP_RECV_INV = 6,
+
+ ERDMA_OP_RSVD0 = 7,
+ ERDMA_OP_RSVD1 = 8,
+ ERDMA_OP_WRITE_WITH_IMM = 9,
+
+ ERDMA_OP_RSVD2 = 10,
+ ERDMA_OP_RSVD3 = 11,
+
+ ERDMA_OP_RSP_SEND_IMM = 12,
+ ERDMA_OP_SEND_WITH_INV = 13,
+
+ ERDMA_OP_REG_MR = 14,
+ ERDMA_OP_LOCAL_INV = 15,
+ ERDMA_OP_READ_WITH_INV = 16,
+ ERDMA_NUM_OPCODES = 17,
+ ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
+};
+
+enum erdma_wc_status {
+ ERDMA_WC_SUCCESS = 0,
+ ERDMA_WC_GENERAL_ERR = 1,
+ ERDMA_WC_RECV_WQE_FORMAT_ERR = 2,
+ ERDMA_WC_RECV_STAG_INVALID_ERR = 3,
+ ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4,
+ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5,
+ ERDMA_WC_RECV_PDID_ERR = 6,
+ ERDMA_WC_RECV_WARRPING_ERR = 7,
+ ERDMA_WC_SEND_WQE_FORMAT_ERR = 8,
+ ERDMA_WC_SEND_WQE_ORD_EXCEED = 9,
+ ERDMA_WC_SEND_STAG_INVALID_ERR = 10,
+ ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11,
+ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12,
+ ERDMA_WC_SEND_PDID_ERR = 13,
+ ERDMA_WC_SEND_WARRPING_ERR = 14,
+ ERDMA_WC_FLUSH_ERR = 15,
+ ERDMA_WC_RETRY_EXC_ERR = 16,
+ ERDMA_NUM_WC_STATUS
+};
+
+enum erdma_vendor_err {
+ ERDMA_WC_VENDOR_NO_ERR = 0,
+ ERDMA_WC_VENDOR_INVALID_RQE = 1,
+ ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2,
+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3,
+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4,
+ ERDMA_WC_VENDOR_RQE_INVALID_PD = 5,
+ ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6,
+ ERDMA_WC_VENDOR_INVALID_SQE = 0x20,
+ ERDMA_WC_VENDOR_ZERO_ORD = 0x21,
+ ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30,
+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31,
+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32,
+ ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33,
+ ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34
+};
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
new file mode 100644
index 000000000000..49778bb294ae
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -0,0 +1,605 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include <linux/module.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
+MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *arg)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(arg);
+ struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
+
+ if (dev->netdev == NULL || dev->netdev != netdev)
+ goto done;
+
+ switch (event) {
+ case NETDEV_UP:
+ dev->state = IB_PORT_ACTIVE;
+ erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
+ break;
+ case NETDEV_DOWN:
+ dev->state = IB_PORT_DOWN;
+ erdma_port_event(dev, IB_EVENT_PORT_ERR);
+ break;
+ case NETDEV_CHANGEMTU:
+ if (dev->mtu != netdev->mtu) {
+ erdma_set_mtu(dev, netdev->mtu);
+ dev->mtu = netdev->mtu;
+ }
+ break;
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGE:
+ default:
+ break;
+ }
+
+done:
+ return NOTIFY_OK;
+}
+
+static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
+{
+ struct net_device *netdev;
+ int ret = -ENODEV;
+
+ /* Already binded to a net_device, so we skip. */
+ if (dev->netdev)
+ return 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ /*
+ * In erdma, the paired netdev and ibdev should have the same
+ * MAC address. erdma can get the value from its PCIe bar
+ * registers. Since erdma can not get the paired netdev
+ * reference directly, we do a traverse here to get the paired
+ * netdev.
+ */
+ if (ether_addr_equal_unaligned(netdev->perm_addr,
+ dev->attrs.peer_addr)) {
+ ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
+ if (ret) {
+ rtnl_unlock();
+ ibdev_warn(&dev->ibdev,
+ "failed (%d) to link netdev", ret);
+ return ret;
+ }
+
+ dev->netdev = netdev;
+ break;
+ }
+ }
+
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int erdma_device_register(struct erdma_dev *dev)
+{
+ struct ib_device *ibdev = &dev->ibdev;
+ int ret;
+
+ ret = erdma_enum_and_get_netdev(dev);
+ if (ret)
+ return ret;
+
+ dev->mtu = dev->netdev->mtu;
+ addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
+
+ ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "ib_register_device failed: ret = %d\n", ret);
+ return ret;
+ }
+
+ dev->netdev_nb.notifier_call = erdma_netdev_event;
+ ret = register_netdevice_notifier(&dev->netdev_nb);
+ if (ret) {
+ ibdev_err(&dev->ibdev, "failed to register notifier.\n");
+ ib_unregister_device(ibdev);
+ }
+
+ return ret;
+}
+
+static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
+{
+ struct erdma_dev *dev = data;
+
+ erdma_cmdq_completion_handler(&dev->cmdq);
+ erdma_aeq_event_handler(dev);
+
+ return IRQ_HANDLED;
+}
+
+static void erdma_dwqe_resource_init(struct erdma_dev *dev)
+{
+ int total_pages, type0, type1;
+
+ dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
+
+ if (dev->attrs.grp_num < 4)
+ dev->attrs.disable_dwqe = true;
+ else
+ dev->attrs.disable_dwqe = false;
+
+ /* One page contains 4 goups. */
+ total_pages = dev->attrs.grp_num * 4;
+
+ if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
+ dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
+ type0 = ERDMA_DWQE_TYPE0_CNT;
+ type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
+ } else {
+ type1 = total_pages / 3;
+ type0 = total_pages - type1 - 1;
+ }
+
+ dev->attrs.dwqe_pages = type0;
+ dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
+}
+
+static int erdma_request_vectors(struct erdma_dev *dev)
+{
+ int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
+ ret);
+ return ret;
+ }
+ dev->attrs.irq_num = ret;
+
+ return 0;
+}
+
+static int erdma_comm_irq_init(struct erdma_dev *dev)
+{
+ snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
+ pci_name(dev->pdev));
+ dev->comm_irq.msix_vector =
+ pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
+
+ cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
+ &dev->comm_irq.affinity_hint_mask);
+ irq_set_affinity_hint(dev->comm_irq.msix_vector,
+ &dev->comm_irq.affinity_hint_mask);
+
+ return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
+ dev->comm_irq.name, dev);
+}
+
+static void erdma_comm_irq_uninit(struct erdma_dev *dev)
+{
+ irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
+ free_irq(dev->comm_irq.msix_vector, dev);
+}
+
+static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
+{
+ int ret;
+
+ erdma_dwqe_resource_init(dev);
+
+ ret = dma_set_mask_and_coherent(&pdev->dev,
+ DMA_BIT_MASK(ERDMA_PCI_WIDTH));
+ if (ret)
+ return ret;
+
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
+ return 0;
+}
+
+static void erdma_device_uninit(struct erdma_dev *dev)
+{
+ u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
+
+ erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
+}
+
+static const struct pci_device_id erdma_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
+ {}
+};
+
+static int erdma_probe_dev(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev;
+ int bars, err;
+ u32 version;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
+ return err;
+ }
+
+ pci_set_master(pdev);
+
+ dev = ib_alloc_device(erdma_dev, ibdev);
+ if (!dev) {
+ dev_err(&pdev->dev, "ib_alloc_device failed\n");
+ err = -ENOMEM;
+ goto err_disable_device;
+ }
+
+ pci_set_drvdata(pdev, dev);
+ dev->pdev = pdev;
+ dev->attrs.numa_node = dev_to_node(&pdev->dev);
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ if (bars != ERDMA_BAR_MASK || err) {
+ err = err ? err : -EINVAL;
+ goto err_ib_device_release;
+ }
+
+ dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
+ dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
+
+ dev->func_bar =
+ devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
+ if (!dev->func_bar) {
+ dev_err(&pdev->dev, "devm_ioremap failed.\n");
+ err = -EFAULT;
+ goto err_release_bars;
+ }
+
+ version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
+ if (version == 0) {
+ /* we knows that it is a non-functional function. */
+ err = -ENODEV;
+ goto err_iounmap_func_bar;
+ }
+
+ err = erdma_device_init(dev, pdev);
+ if (err)
+ goto err_iounmap_func_bar;
+
+ err = erdma_request_vectors(dev);
+ if (err)
+ goto err_iounmap_func_bar;
+
+ err = erdma_comm_irq_init(dev);
+ if (err)
+ goto err_free_vectors;
+
+ err = erdma_aeq_init(dev);
+ if (err)
+ goto err_uninit_comm_irq;
+
+ err = erdma_cmdq_init(dev);
+ if (err)
+ goto err_uninit_aeq;
+
+ err = erdma_ceqs_init(dev);
+ if (err)
+ goto err_uninit_cmdq;
+
+ erdma_finish_cmdq_init(dev);
+
+ return 0;
+
+err_uninit_cmdq:
+ erdma_device_uninit(dev);
+ erdma_cmdq_destroy(dev);
+
+err_uninit_aeq:
+ erdma_aeq_destroy(dev);
+
+err_uninit_comm_irq:
+ erdma_comm_irq_uninit(dev);
+
+err_free_vectors:
+ pci_free_irq_vectors(dev->pdev);
+
+err_iounmap_func_bar:
+ devm_iounmap(&pdev->dev, dev->func_bar);
+
+err_release_bars:
+ pci_release_selected_regions(pdev, bars);
+
+err_ib_device_release:
+ ib_dealloc_device(&dev->ibdev);
+
+err_disable_device:
+ pci_disable_device(pdev);
+
+ return err;
+}
+
+static void erdma_remove_dev(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+
+ erdma_ceqs_uninit(dev);
+
+ erdma_device_uninit(dev);
+
+ erdma_cmdq_destroy(dev);
+ erdma_aeq_destroy(dev);
+ erdma_comm_irq_uninit(dev);
+ pci_free_irq_vectors(dev->pdev);
+
+ devm_iounmap(&pdev->dev, dev->func_bar);
+ pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
+
+ ib_dealloc_device(&dev->ibdev);
+
+ pci_disable_device(pdev);
+}
+
+#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
+
+static int erdma_dev_attrs_init(struct erdma_dev *dev)
+{
+ int err;
+ u64 req_hdr, cap0, cap1;
+
+ erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_DEVICE);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
+ &cap1);
+ if (err)
+ return err;
+
+ dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
+ dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
+ dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
+ dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
+ dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
+ dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
+ dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
+ dev->attrs.max_mr = dev->attrs.max_qp << 1;
+ dev->attrs.max_cq = dev->attrs.max_qp << 1;
+
+ dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
+ dev->attrs.max_ord = ERDMA_MAX_ORD;
+ dev->attrs.max_ird = ERDMA_MAX_IRD;
+ dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
+ dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
+ dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
+ dev->attrs.max_pd = ERDMA_MAX_PD;
+
+ dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
+ dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
+
+ erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_QUERY_FW_INFO);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
+ &cap1);
+ if (!err)
+ dev->attrs.fw_version =
+ FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
+
+ return err;
+}
+
+static int erdma_res_cb_init(struct erdma_dev *dev)
+{
+ int i, j;
+
+ for (i = 0; i < ERDMA_RES_CNT; i++) {
+ dev->res_cb[i].next_alloc_idx = 1;
+ spin_lock_init(&dev->res_cb[i].lock);
+ dev->res_cb[i].bitmap =
+ bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
+ if (!dev->res_cb[i].bitmap)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (j = 0; j < i; j++)
+ bitmap_free(dev->res_cb[j].bitmap);
+
+ return -ENOMEM;
+}
+
+static void erdma_res_cb_free(struct erdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ERDMA_RES_CNT; i++)
+ bitmap_free(dev->res_cb[i].bitmap);
+}
+
+static const struct ib_device_ops erdma_device_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_ERDMA,
+ .uverbs_abi_ver = ERDMA_ABI_VERSION,
+
+ .alloc_mr = erdma_ib_alloc_mr,
+ .alloc_pd = erdma_alloc_pd,
+ .alloc_ucontext = erdma_alloc_ucontext,
+ .create_cq = erdma_create_cq,
+ .create_qp = erdma_create_qp,
+ .dealloc_pd = erdma_dealloc_pd,
+ .dealloc_ucontext = erdma_dealloc_ucontext,
+ .dereg_mr = erdma_dereg_mr,
+ .destroy_cq = erdma_destroy_cq,
+ .destroy_qp = erdma_destroy_qp,
+ .get_dma_mr = erdma_get_dma_mr,
+ .get_port_immutable = erdma_get_port_immutable,
+ .iw_accept = erdma_accept,
+ .iw_add_ref = erdma_qp_get_ref,
+ .iw_connect = erdma_connect,
+ .iw_create_listen = erdma_create_listen,
+ .iw_destroy_listen = erdma_destroy_listen,
+ .iw_get_qp = erdma_get_ibqp,
+ .iw_reject = erdma_reject,
+ .iw_rem_ref = erdma_qp_put_ref,
+ .map_mr_sg = erdma_map_mr_sg,
+ .mmap = erdma_mmap,
+ .mmap_free = erdma_mmap_free,
+ .modify_qp = erdma_modify_qp,
+ .post_recv = erdma_post_recv,
+ .post_send = erdma_post_send,
+ .poll_cq = erdma_poll_cq,
+ .query_device = erdma_query_device,
+ .query_gid = erdma_query_gid,
+ .query_port = erdma_query_port,
+ .query_qp = erdma_query_qp,
+ .req_notify_cq = erdma_req_notify_cq,
+ .reg_user_mr = erdma_reg_user_mr,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
+};
+
+static int erdma_ib_device_add(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+ struct ib_device *ibdev = &dev->ibdev;
+ u64 mac;
+ int ret;
+
+ ret = erdma_dev_attrs_init(dev);
+ if (ret)
+ return ret;
+
+ ibdev->node_type = RDMA_NODE_RNIC;
+ memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
+
+ /*
+ * Current model (one-to-one device association):
+ * One ERDMA device per net_device or, equivalently,
+ * per physical port.
+ */
+ ibdev->phys_port_cnt = 1;
+ ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
+
+ ib_set_device_ops(ibdev, &erdma_device_ops);
+
+ INIT_LIST_HEAD(&dev->cep_list);
+
+ spin_lock_init(&dev->lock);
+ xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
+ dev->next_alloc_cqn = 1;
+ dev->next_alloc_qpn = 1;
+
+ ret = erdma_res_cb_init(dev);
+ if (ret)
+ return ret;
+
+ spin_lock_init(&dev->db_bitmap_lock);
+ bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
+ bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
+
+ atomic_set(&dev->num_ctx, 0);
+
+ mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
+ mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
+
+ u64_to_ether_addr(mac, dev->attrs.peer_addr);
+
+ ret = erdma_device_register(dev);
+ if (ret)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ xa_destroy(&dev->qp_xa);
+ xa_destroy(&dev->cq_xa);
+
+ erdma_res_cb_free(dev);
+
+ return ret;
+}
+
+static void erdma_ib_device_remove(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+
+ unregister_netdevice_notifier(&dev->netdev_nb);
+ ib_unregister_device(&dev->ibdev);
+
+ erdma_res_cb_free(dev);
+ xa_destroy(&dev->qp_xa);
+ xa_destroy(&dev->cq_xa);
+}
+
+static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int ret;
+
+ ret = erdma_probe_dev(pdev);
+ if (ret)
+ return ret;
+
+ ret = erdma_ib_device_add(pdev);
+ if (ret) {
+ erdma_remove_dev(pdev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void erdma_remove(struct pci_dev *pdev)
+{
+ erdma_ib_device_remove(pdev);
+ erdma_remove_dev(pdev);
+}
+
+static struct pci_driver erdma_pci_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = erdma_pci_tbl,
+ .probe = erdma_probe,
+ .remove = erdma_remove
+};
+
+MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
+
+static __init int erdma_init_module(void)
+{
+ int ret;
+
+ ret = erdma_cm_init();
+ if (ret)
+ return ret;
+
+ ret = pci_register_driver(&erdma_pci_driver);
+ if (ret)
+ erdma_cm_exit();
+
+ return ret;
+}
+
+static void __exit erdma_exit_module(void)
+{
+ pci_unregister_driver(&erdma_pci_driver);
+
+ erdma_cm_exit();
+}
+
+module_init(erdma_init_module);
+module_exit(erdma_exit_module);
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
new file mode 100644
index 000000000000..5fe1a339a435
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2021, Alibaba Group */
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+void erdma_qp_llp_close(struct erdma_qp *qp)
+{
+ struct erdma_qp_attrs qp_attrs;
+
+ down_write(&qp->state_lock);
+
+ switch (qp->attrs.state) {
+ case ERDMA_QP_STATE_RTS:
+ case ERDMA_QP_STATE_RTR:
+ case ERDMA_QP_STATE_IDLE:
+ case ERDMA_QP_STATE_TERMINATE:
+ qp_attrs.state = ERDMA_QP_STATE_CLOSING;
+ erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
+ break;
+ case ERDMA_QP_STATE_CLOSING:
+ qp->attrs.state = ERDMA_QP_STATE_IDLE;
+ break;
+ default:
+ break;
+ }
+
+ if (qp->cep) {
+ erdma_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+
+ up_write(&qp->state_lock);
+}
+
+struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
+{
+ struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
+
+ if (qp)
+ return &qp->ibqp;
+
+ return NULL;
+}
+
+static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
+ struct erdma_qp_attrs *attrs,
+ enum erdma_qp_attr_mask mask)
+{
+ int ret;
+ struct erdma_dev *dev = qp->dev;
+ struct erdma_cmdq_modify_qp_req req;
+ struct tcp_sock *tp;
+ struct erdma_cep *cep = qp->cep;
+ struct sockaddr_storage local_addr, remote_addr;
+
+ if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
+ return -EINVAL;
+
+ if (!(mask & ERDMA_QP_ATTR_MPA))
+ return -EINVAL;
+
+ ret = getname_local(cep->sock, &local_addr);
+ if (ret < 0)
+ return ret;
+
+ ret = getname_peer(cep->sock, &remote_addr);
+ if (ret < 0)
+ return ret;
+
+ qp->attrs.state = ERDMA_QP_STATE_RTS;
+
+ tp = tcp_sk(qp->cep->sock->sk);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
+ req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
+ req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
+ req.dport = to_sockaddr_in(remote_addr).sin_port;
+ req.sport = to_sockaddr_in(local_addr).sin_port;
+
+ req.send_nxt = tp->snd_nxt;
+ /* rsvd tcp seq for mpa-rsp in server. */
+ if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
+ req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
+ req.recv_nxt = tp->rcv_nxt;
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
+static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
+ struct erdma_qp_attrs *attrs,
+ enum erdma_qp_attr_mask mask)
+{
+ struct erdma_dev *dev = qp->dev;
+ struct erdma_cmdq_modify_qp_req req;
+
+ qp->attrs.state = attrs->state;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
+int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
+ enum erdma_qp_attr_mask mask)
+{
+ int drop_conn, ret = 0;
+
+ if (!mask)
+ return 0;
+
+ if (!(mask & ERDMA_QP_ATTR_STATE))
+ return 0;
+
+ switch (qp->attrs.state) {
+ case ERDMA_QP_STATE_IDLE:
+ case ERDMA_QP_STATE_RTR:
+ if (attrs->state == ERDMA_QP_STATE_RTS) {
+ ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
+ } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
+ qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ if (qp->cep) {
+ erdma_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+ ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ }
+ break;
+ case ERDMA_QP_STATE_RTS:
+ drop_conn = 0;
+
+ if (attrs->state == ERDMA_QP_STATE_CLOSING) {
+ ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ drop_conn = 1;
+ } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
+ qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
+ ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ drop_conn = 1;
+ } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ drop_conn = 1;
+ }
+
+ if (drop_conn)
+ erdma_qp_cm_drop(qp);
+
+ break;
+ case ERDMA_QP_STATE_TERMINATE:
+ if (attrs->state == ERDMA_QP_STATE_ERROR)
+ qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ break;
+ case ERDMA_QP_STATE_CLOSING:
+ if (attrs->state == ERDMA_QP_STATE_IDLE) {
+ qp->attrs.state = ERDMA_QP_STATE_IDLE;
+ } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ } else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
+ return -ECONNABORTED;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void erdma_qp_safe_free(struct kref *ref)
+{
+ struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
+
+ complete(&qp->safe_free);
+}
+
+void erdma_qp_put(struct erdma_qp *qp)
+{
+ WARN_ON(kref_read(&qp->ref) < 1);
+ kref_put(&qp->ref, erdma_qp_safe_free);
+}
+
+void erdma_qp_get(struct erdma_qp *qp)
+{
+ kref_get(&qp->ref);
+}
+
+static int fill_inline_data(struct erdma_qp *qp,
+ const struct ib_send_wr *send_wr, u16 wqe_idx,
+ u32 sgl_offset, __le32 *length_field)
+{
+ u32 remain_size, copy_size, data_off, bytes = 0;
+ char *data;
+ int i = 0;
+
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+ data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
+ SQEBB_SHIFT);
+
+ while (i < send_wr->num_sge) {
+ bytes += send_wr->sg_list[i].length;
+ if (bytes > (int)ERDMA_MAX_INLINE)
+ return -EINVAL;
+
+ remain_size = send_wr->sg_list[i].length;
+ data_off = 0;
+
+ while (1) {
+ copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
+
+ memcpy(data + sgl_offset,
+ (void *)(uintptr_t)send_wr->sg_list[i].addr +
+ data_off,
+ copy_size);
+ remain_size -= copy_size;
+ data_off += copy_size;
+ sgl_offset += copy_size;
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+
+ data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ if (!remain_size)
+ break;
+ }
+
+ i++;
+ }
+ *length_field = cpu_to_le32(bytes);
+
+ return bytes;
+}
+
+static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
+ u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
+{
+ int i = 0;
+ u32 bytes = 0;
+ char *sgl;
+
+ if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
+ return -EINVAL;
+
+ if (sgl_offset & 0xF)
+ return -EINVAL;
+
+ while (i < send_wr->num_sge) {
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+ sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+
+ bytes += send_wr->sg_list[i].length;
+ memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
+ sizeof(struct ib_sge));
+
+ sgl_offset += sizeof(struct ib_sge);
+ i++;
+ }
+
+ *length_field = cpu_to_le32(bytes);
+ return 0;
+}
+
+static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
+ const struct ib_send_wr *send_wr)
+{
+ u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
+ u32 idx = *pi & (qp->attrs.sq_size - 1);
+ enum ib_wr_opcode op = send_wr->opcode;
+ struct erdma_readreq_sqe *read_sqe;
+ struct erdma_reg_mr_sqe *regmr_sge;
+ struct erdma_write_sqe *write_sqe;
+ struct erdma_send_sqe *send_sqe;
+ struct ib_rdma_wr *rdma_wr;
+ struct erdma_mr *mr;
+ __le32 *length_field;
+ u64 wqe_hdr, *entry;
+ struct ib_sge *sge;
+ u32 attrs;
+ int ret;
+
+ entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
+ SQEBB_SHIFT);
+
+ /* Clear the SQE header section. */
+ *entry = 0;
+
+ qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
+ flags = send_wr->send_flags;
+ wqe_hdr = FIELD_PREP(
+ ERDMA_SQE_HDR_CE_MASK,
+ ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
+ flags & IB_SEND_SOLICITED ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
+ flags & IB_SEND_FENCE ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
+ flags & IB_SEND_INLINE ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
+
+ switch (op) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ hw_op = ERDMA_OP_WRITE;
+ if (op == IB_WR_RDMA_WRITE_WITH_IMM)
+ hw_op = ERDMA_OP_WRITE_WITH_IMM;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
+ write_sqe = (struct erdma_write_sqe *)entry;
+
+ write_sqe->imm_data = send_wr->ex.imm_data;
+ write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
+ write_sqe->sink_to_h =
+ cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
+ write_sqe->sink_to_l =
+ cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
+
+ length_field = &write_sqe->length;
+ wqe_size = sizeof(struct erdma_write_sqe);
+ sgl_offset = wqe_size;
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ read_sqe = (struct erdma_readreq_sqe *)entry;
+ if (unlikely(send_wr->num_sge != 1))
+ return -EINVAL;
+ hw_op = ERDMA_OP_READ;
+ if (op == IB_WR_RDMA_READ_WITH_INV) {
+ hw_op = ERDMA_OP_READ_WITH_INV;
+ read_sqe->invalid_stag =
+ cpu_to_le32(send_wr->ex.invalidate_rkey);
+ }
+
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
+ read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
+ read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
+ read_sqe->sink_to_l =
+ cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
+ read_sqe->sink_to_h =
+ cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
+
+ sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ sge->addr = rdma_wr->remote_addr;
+ sge->lkey = rdma_wr->rkey;
+ sge->length = send_wr->sg_list[0].length;
+ wqe_size = sizeof(struct erdma_readreq_sqe) +
+ send_wr->num_sge * sizeof(struct ib_sge);
+
+ goto out;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ send_sqe = (struct erdma_send_sqe *)entry;
+ hw_op = ERDMA_OP_SEND;
+ if (op == IB_WR_SEND_WITH_IMM) {
+ hw_op = ERDMA_OP_SEND_WITH_IMM;
+ send_sqe->imm_data = send_wr->ex.imm_data;
+ } else if (op == IB_WR_SEND_WITH_INV) {
+ hw_op = ERDMA_OP_SEND_WITH_INV;
+ send_sqe->invalid_stag =
+ cpu_to_le32(send_wr->ex.invalidate_rkey);
+ }
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ length_field = &send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe);
+ sgl_offset = wqe_size;
+
+ break;
+ case IB_WR_REG_MR:
+ wqe_hdr |=
+ FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
+ regmr_sge = (struct erdma_reg_mr_sqe *)entry;
+ mr = to_emr(reg_wr(send_wr)->mr);
+
+ mr->access = ERDMA_MR_ACC_LR |
+ to_erdma_access_flags(reg_wr(send_wr)->access);
+ regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
+ regmr_sge->length = cpu_to_le32(mr->ibmr.length);
+ regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
+ attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) |
+ FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
+ FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
+ mr->mem.mtt_nents);
+
+ if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
+ /* Copy SGLs to SQE content to accelerate */
+ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT),
+ mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents));
+ wqe_size = sizeof(struct erdma_reg_mr_sqe) +
+ MTT_SIZE(mr->mem.mtt_nents);
+ } else {
+ attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
+ wqe_size = sizeof(struct erdma_reg_mr_sqe);
+ }
+
+ regmr_sge->attrs = cpu_to_le32(attrs);
+ goto out;
+ case IB_WR_LOCAL_INV:
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_LOCAL_INV);
+ regmr_sge = (struct erdma_reg_mr_sqe *)entry;
+ regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
+ wqe_size = sizeof(struct erdma_reg_mr_sqe);
+ goto out;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (flags & IB_SEND_INLINE) {
+ ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
+ length_field);
+ if (ret < 0)
+ return -EINVAL;
+ wqe_size += ret;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
+ } else {
+ ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
+ if (ret)
+ return -EINVAL;
+ wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
+ send_wr->num_sge);
+ }
+
+out:
+ wqebb_cnt = SQEBB_COUNT(wqe_size);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
+ *pi += wqebb_cnt;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
+
+ *entry = wqe_hdr;
+
+ return 0;
+}
+
+static void kick_sq_db(struct erdma_qp *qp, u16 pi)
+{
+ u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
+ FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
+
+ *(u64 *)qp->kern_qp.sq_db_info = db_data;
+ writeq(db_data, qp->kern_qp.hw_sq_db);
+}
+
+int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ int ret = 0;
+ const struct ib_send_wr *wr = send_wr;
+ unsigned long flags;
+ u16 sq_pi;
+
+ if (!send_wr)
+ return -EINVAL;
+
+ spin_lock_irqsave(&qp->lock, flags);
+ sq_pi = qp->kern_qp.sq_pi;
+
+ while (wr) {
+ if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
+ ret = -ENOMEM;
+ *bad_send_wr = send_wr;
+ break;
+ }
+
+ ret = erdma_push_one_sqe(qp, &sq_pi, wr);
+ if (ret) {
+ *bad_send_wr = wr;
+ break;
+ }
+ qp->kern_qp.sq_pi = sq_pi;
+ kick_sq_db(qp, sq_pi);
+
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ return ret;
+}
+
+static int erdma_post_recv_one(struct erdma_qp *qp,
+ const struct ib_recv_wr *recv_wr)
+{
+ struct erdma_rqe *rqe =
+ get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
+ qp->attrs.rq_size, RQE_SHIFT);
+
+ rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
+ rqe->qpn = cpu_to_le32(QP_ID(qp));
+
+ if (recv_wr->num_sge == 0) {
+ rqe->length = 0;
+ } else if (recv_wr->num_sge == 1) {
+ rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
+ rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
+ rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
+ } else {
+ return -EINVAL;
+ }
+
+ *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe;
+ writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
+
+ qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
+ recv_wr->wr_id;
+ qp->kern_qp.rq_pi++;
+
+ return 0;
+}
+
+int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr)
+{
+ const struct ib_recv_wr *wr = recv_wr;
+ struct erdma_qp *qp = to_eqp(ibqp);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&qp->lock, flags);
+
+ while (wr) {
+ ret = erdma_post_recv_one(qp, wr);
+ if (ret) {
+ *bad_recv_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+
+ spin_unlock_irqrestore(&qp->lock, flags);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
new file mode 100644
index 000000000000..62be98e2b941
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -0,0 +1,1459 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+/* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
+
+#include <linux/vmalloc.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
+{
+ struct erdma_cmdq_create_qp_req req;
+ struct erdma_pd *pd = to_epd(qp->ibqp.pd);
+ struct erdma_uqp *user_qp;
+ u64 resp0, resp1;
+ int err;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
+ ilog2(qp->attrs.sq_size)) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
+ ilog2(qp->attrs.rq_size)) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res)) {
+ u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT;
+
+ req.sq_cqn_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ pgsz_range) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
+ req.rq_cqn_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ pgsz_range) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
+
+ req.sq_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
+ ERDMA_MR_INLINE_MTT);
+ req.rq_mtt_cfg = req.sq_mtt_cfg;
+
+ req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
+ req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
+ req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
+ (qp->attrs.sq_size << SQEBB_SHIFT);
+ req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
+ (qp->attrs.rq_size << RQE_SHIFT);
+ } else {
+ user_qp = &qp->user_qp;
+ req.sq_cqn_mtt_cfg = FIELD_PREP(
+ ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT);
+ req.sq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
+
+ req.rq_cqn_mtt_cfg = FIELD_PREP(
+ ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT);
+ req.rq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
+
+ req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
+ req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
+ user_qp->sq_mtt.mtt_nents) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
+ user_qp->sq_mtt.mtt_type);
+
+ req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
+ req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
+ user_qp->rq_mtt.mtt_nents) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
+ user_qp->rq_mtt.mtt_type);
+
+ req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
+ req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
+
+ req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
+ req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
+ }
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
+ &resp1);
+ if (!err)
+ qp->attrs.cookie =
+ FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
+
+ return err;
+}
+
+static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
+{
+ struct erdma_cmdq_reg_mr_req req;
+ struct erdma_pd *pd = to_epd(mr->ibmr.pd);
+ u64 *phy_addr;
+ int i;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
+ FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
+ FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
+ FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
+ FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) |
+ FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0);
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
+ ilog2(mr->mem.page_size)) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
+
+ if (mr->type == ERDMA_MR_TYPE_DMA)
+ goto post_cmd;
+
+ if (mr->type == ERDMA_MR_TYPE_NORMAL) {
+ req.start_va = mr->mem.va;
+ req.size = mr->mem.len;
+ }
+
+ if (mr->type == ERDMA_MR_TYPE_FRMR ||
+ mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
+ phy_addr = req.phy_addr;
+ *phy_addr = mr->mem.mtt_entry[0];
+ } else {
+ phy_addr = req.phy_addr;
+ for (i = 0; i < mr->mem.mtt_nents; i++)
+ *phy_addr++ = mr->mem.mtt_entry[i];
+ }
+
+post_cmd:
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
+static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
+{
+ struct erdma_cmdq_create_cq_req req;
+ u32 page_size;
+ struct erdma_mem *mtt;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_CQ);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
+
+ if (rdma_is_kernel_res(&cq->ibcq.res)) {
+ page_size = SZ_32M;
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
+ ilog2(page_size) - PAGE_SHIFT);
+ req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
+ req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
+
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
+ ERDMA_MR_INLINE_MTT);
+
+ req.first_page_offset = 0;
+ req.cq_db_info_addr =
+ cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
+ } else {
+ mtt = &cq->user_cq.qbuf_mtt;
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
+ ilog2(mtt->page_size) - PAGE_SHIFT);
+ if (mtt->mtt_nents == 1) {
+ req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
+ req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
+ } else {
+ req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
+ req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
+ }
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
+ mtt->mtt_nents);
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
+ mtt->mtt_type);
+
+ req.first_page_offset = mtt->page_offset;
+ req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
+ }
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
+static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
+{
+ int idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&res_cb->lock, flags);
+ idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
+ res_cb->next_alloc_idx);
+ if (idx == res_cb->max_cap) {
+ idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
+ if (idx == res_cb->max_cap) {
+ res_cb->next_alloc_idx = 1;
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+ return -ENOSPC;
+ }
+ }
+
+ set_bit(idx, res_cb->bitmap);
+ res_cb->next_alloc_idx = idx + 1;
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+
+ return idx;
+}
+
+static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
+{
+ unsigned long flags;
+ u32 used;
+
+ spin_lock_irqsave(&res_cb->lock, flags);
+ used = __test_and_clear_bit(idx, res_cb->bitmap);
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+ WARN_ON(!used);
+}
+
+static struct rdma_user_mmap_entry *
+erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
+ u32 size, u8 mmap_flag, u64 *mmap_offset)
+{
+ struct erdma_user_mmap_entry *entry =
+ kzalloc(sizeof(*entry), GFP_KERNEL);
+ int ret;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = (u64)address;
+ entry->mmap_flag = mmap_flag;
+
+ size = PAGE_ALIGN(size);
+
+ ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
+ size);
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+
+ *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
+ struct ib_udata *unused)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ memset(attr, 0, sizeof(*attr));
+
+ attr->max_mr_size = dev->attrs.max_mr_size;
+ attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
+ attr->vendor_part_id = dev->pdev->device;
+ attr->hw_ver = dev->pdev->revision;
+ attr->max_qp = dev->attrs.max_qp - 1;
+ attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
+ attr->max_qp_rd_atom = dev->attrs.max_ord;
+ attr->max_qp_init_rd_atom = dev->attrs.max_ird;
+ attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
+ attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ ibdev->local_dma_lkey = dev->attrs.local_dma_key;
+ attr->max_send_sge = dev->attrs.max_send_sge;
+ attr->max_recv_sge = dev->attrs.max_recv_sge;
+ attr->max_sge_rd = dev->attrs.max_sge_rd;
+ attr->max_cq = dev->attrs.max_cq - 1;
+ attr->max_cqe = dev->attrs.max_cqe;
+ attr->max_mr = dev->attrs.max_mr;
+ attr->max_pd = dev->attrs.max_pd;
+ attr->max_mw = dev->attrs.max_mw;
+ attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
+ attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+ attr->fw_ver = dev->attrs.fw_version;
+
+ if (dev->netdev)
+ addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
+ dev->netdev->dev_addr);
+
+ return 0;
+}
+
+int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
+ union ib_gid *gid)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ memset(gid, 0, sizeof(*gid));
+ ether_addr_copy(gid->raw, dev->attrs.peer_addr);
+
+ return 0;
+}
+
+int erdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+ struct net_device *ndev = dev->netdev;
+
+ memset(attr, 0, sizeof(*attr));
+
+ attr->gid_tbl_len = 1;
+ attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
+ attr->max_msg_sz = -1;
+
+ if (!ndev)
+ goto out;
+
+ ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
+ attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
+ attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+ if (netif_running(ndev) && netif_carrier_ok(ndev))
+ dev->state = IB_PORT_ACTIVE;
+ else
+ dev->state = IB_PORT_DOWN;
+ attr->state = dev->state;
+
+out:
+ if (dev->state == IB_PORT_ACTIVE)
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ else
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+
+ return 0;
+}
+
+int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
+ struct ib_port_immutable *port_immutable)
+{
+ port_immutable->gid_tbl_len = 1;
+ port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ return 0;
+}
+
+int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct erdma_pd *pd = to_epd(ibpd);
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ int pdn;
+
+ pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
+ if (pdn < 0)
+ return pdn;
+
+ pd->pdn = pdn;
+
+ return 0;
+}
+
+int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct erdma_pd *pd = to_epd(ibpd);
+ struct erdma_dev *dev = to_edev(ibpd->device);
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
+
+ return 0;
+}
+
+static int erdma_qp_validate_cap(struct erdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
+ (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
+ (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
+ (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
+ (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
+ !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int erdma_qp_validate_attr(struct erdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if (attrs->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+
+ if (attrs->srq)
+ return -EOPNOTSUPP;
+
+ if (!attrs->send_cq || !attrs->recv_cq)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void free_kernel_qp(struct erdma_qp *qp)
+{
+ struct erdma_dev *dev = qp->dev;
+
+ vfree(qp->kern_qp.swr_tbl);
+ vfree(qp->kern_qp.rwr_tbl);
+
+ if (qp->kern_qp.sq_buf)
+ dma_free_coherent(
+ &dev->pdev->dev,
+ WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
+ qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
+
+ if (qp->kern_qp.rq_buf)
+ dma_free_coherent(
+ &dev->pdev->dev,
+ WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
+ qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
+}
+
+static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
+ struct ib_qp_init_attr *attrs)
+{
+ struct erdma_kqp *kqp = &qp->kern_qp;
+ int size;
+
+ if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
+ kqp->sig_all = 1;
+
+ kqp->sq_pi = 0;
+ kqp->sq_ci = 0;
+ kqp->rq_pi = 0;
+ kqp->rq_ci = 0;
+ kqp->hw_sq_db =
+ dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
+ kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
+
+ kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
+ kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
+ if (!kqp->swr_tbl || !kqp->rwr_tbl)
+ goto err_out;
+
+ size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
+ kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &kqp->sq_buf_dma_addr, GFP_KERNEL);
+ if (!kqp->sq_buf)
+ goto err_out;
+
+ size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
+ kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &kqp->rq_buf_dma_addr, GFP_KERNEL);
+ if (!kqp->rq_buf)
+ goto err_out;
+
+ kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
+ kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
+
+ return 0;
+
+err_out:
+ free_kernel_qp(qp);
+ return -ENOMEM;
+}
+
+static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
+ u64 start, u64 len, int access, u64 virt,
+ unsigned long req_page_size, u8 force_indirect_mtt)
+{
+ struct ib_block_iter biter;
+ uint64_t *phy_addr = NULL;
+ int ret = 0;
+
+ mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
+ if (IS_ERR(mem->umem)) {
+ ret = PTR_ERR(mem->umem);
+ mem->umem = NULL;
+ return ret;
+ }
+
+ mem->va = virt;
+ mem->len = len;
+ mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
+ mem->page_offset = start & (mem->page_size - 1);
+ mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
+ mem->page_cnt = mem->mtt_nents;
+
+ if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
+ force_indirect_mtt) {
+ mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
+ mem->mtt_buf =
+ alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
+ if (!mem->mtt_buf) {
+ ret = -ENOMEM;
+ goto error_ret;
+ }
+ phy_addr = mem->mtt_buf;
+ } else {
+ mem->mtt_type = ERDMA_MR_INLINE_MTT;
+ phy_addr = mem->mtt_entry;
+ }
+
+ rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
+ *phy_addr = rdma_block_iter_dma_address(&biter);
+ phy_addr++;
+ }
+
+ if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
+ mem->mtt_entry[0] =
+ dma_map_single(&dev->pdev->dev, mem->mtt_buf,
+ MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
+ free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
+ mem->mtt_buf = NULL;
+ ret = -ENOMEM;
+ goto error_ret;
+ }
+ }
+
+ return 0;
+
+error_ret:
+ if (mem->umem) {
+ ib_umem_release(mem->umem);
+ mem->umem = NULL;
+ }
+
+ return ret;
+}
+
+static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+ if (mem->mtt_buf) {
+ dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
+ MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
+ free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
+ }
+
+ if (mem->umem) {
+ ib_umem_release(mem->umem);
+ mem->umem = NULL;
+ }
+}
+
+static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
+ u64 dbrecords_va,
+ struct erdma_user_dbrecords_page **dbr_page,
+ dma_addr_t *dma_addr)
+{
+ struct erdma_user_dbrecords_page *page = NULL;
+ int rv = 0;
+
+ mutex_lock(&ctx->dbrecords_page_mutex);
+
+ list_for_each_entry(page, &ctx->dbrecords_page_list, list)
+ if (page->va == (dbrecords_va & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ page->va = (dbrecords_va & PAGE_MASK);
+ page->refcnt = 0;
+
+ page->umem = ib_umem_get(ctx->ibucontext.device,
+ dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
+ if (IS_ERR(page->umem)) {
+ rv = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &ctx->dbrecords_page_list);
+
+found:
+ *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (dbrecords_va & ~PAGE_MASK);
+ *dbr_page = page;
+ page->refcnt++;
+
+out:
+ mutex_unlock(&ctx->dbrecords_page_mutex);
+ return rv;
+}
+
+static void
+erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
+ struct erdma_user_dbrecords_page **dbr_page)
+{
+ if (!ctx || !(*dbr_page))
+ return;
+
+ mutex_lock(&ctx->dbrecords_page_mutex);
+ if (--(*dbr_page)->refcnt == 0) {
+ list_del(&(*dbr_page)->list);
+ ib_umem_release((*dbr_page)->umem);
+ kfree(*dbr_page);
+ }
+
+ *dbr_page = NULL;
+ mutex_unlock(&ctx->dbrecords_page_mutex);
+}
+
+static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
+ u64 va, u32 len, u64 db_info_va)
+{
+ dma_addr_t db_info_dma_addr;
+ u32 rq_offset;
+ int ret;
+
+ if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) +
+ qp->attrs.rq_size * RQE_SIZE))
+ return -EINVAL;
+
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
+ qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
+ (SZ_1M - SZ_4K), 1);
+ if (ret)
+ return ret;
+
+ rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT);
+ qp->user_qp.rq_offset = rq_offset;
+
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
+ qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
+ (SZ_1M - SZ_4K), 1);
+ if (ret)
+ goto put_sq_mtt;
+
+ ret = erdma_map_user_dbrecords(uctx, db_info_va,
+ &qp->user_qp.user_dbr_page,
+ &db_info_dma_addr);
+ if (ret)
+ goto put_rq_mtt;
+
+ qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
+ qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
+
+ return 0;
+
+put_rq_mtt:
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
+
+put_sq_mtt:
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
+
+ return ret;
+}
+
+static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
+{
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
+ erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
+}
+
+int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ struct erdma_dev *dev = to_edev(ibqp->device);
+ struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ struct erdma_ureq_create_qp ureq;
+ struct erdma_uresp_create_qp uresp;
+ int ret;
+
+ ret = erdma_qp_validate_cap(dev, attrs);
+ if (ret)
+ goto err_out;
+
+ ret = erdma_qp_validate_attr(dev, attrs);
+ if (ret)
+ goto err_out;
+
+ qp->scq = to_ecq(attrs->send_cq);
+ qp->rcq = to_ecq(attrs->recv_cq);
+ qp->dev = dev;
+ qp->attrs.cc = dev->attrs.cc;
+
+ init_rwsem(&qp->state_lock);
+ kref_init(&qp->ref);
+ init_completion(&qp->safe_free);
+
+ ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
+ XA_LIMIT(1, dev->attrs.max_qp - 1),
+ &dev->next_alloc_qpn, GFP_KERNEL);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
+ ERDMA_MAX_WQEBB_PER_SQE);
+ qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
+
+ if (uctx) {
+ ret = ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen));
+ if (ret)
+ goto err_out_xa;
+
+ ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
+ ureq.db_record_va);
+ if (ret)
+ goto err_out_xa;
+
+ memset(&uresp, 0, sizeof(uresp));
+
+ uresp.num_sqe = qp->attrs.sq_size;
+ uresp.num_rqe = qp->attrs.rq_size;
+ uresp.qp_id = QP_ID(qp);
+ uresp.rq_offset = qp->user_qp.rq_offset;
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_out_cmd;
+ } else {
+ init_kernel_qp(dev, qp, attrs);
+ }
+
+ qp->attrs.max_send_sge = attrs->cap.max_send_sge;
+ qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
+ qp->attrs.state = ERDMA_QP_STATE_IDLE;
+
+ ret = create_qp_cmd(dev, qp);
+ if (ret)
+ goto err_out_cmd;
+
+ spin_lock_init(&qp->lock);
+
+ return 0;
+
+err_out_cmd:
+ if (uctx)
+ free_user_qp(qp, uctx);
+ else
+ free_kernel_qp(qp);
+err_out_xa:
+ xa_erase(&dev->qp_xa, QP_ID(qp));
+err_out:
+ return ret;
+}
+
+static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
+{
+ int stag_idx;
+
+ stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
+ if (stag_idx < 0)
+ return stag_idx;
+
+ /* For now, we always let key field be zero. */
+ *stag = (stag_idx << 8);
+
+ return 0;
+}
+
+struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ struct erdma_mr *mr;
+ u32 stag;
+ int ret;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto out_free;
+
+ mr->type = ERDMA_MR_TYPE_DMA;
+
+ mr->ibmr.lkey = stag;
+ mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto out_remove_stag;
+
+ return &mr->ibmr;
+
+out_remove_stag:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct erdma_mr *mr;
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ int ret;
+ u32 stag;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto out_free;
+
+ mr->type = ERDMA_MR_TYPE_FRMR;
+
+ mr->ibmr.lkey = stag;
+ mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ /* update it in FRMR. */
+ mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
+ ERDMA_MR_ACC_RW;
+
+ mr->mem.page_size = PAGE_SIZE; /* update it later. */
+ mr->mem.page_cnt = max_num_sg;
+ mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
+ mr->mem.mtt_buf =
+ alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
+ if (!mr->mem.mtt_buf) {
+ ret = -ENOMEM;
+ goto out_remove_stag;
+ }
+
+ mr->mem.mtt_entry[0] =
+ dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
+ MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
+ ret = -ENOMEM;
+ goto out_free_mtt;
+ }
+
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto out_dma_unmap;
+
+ return &mr->ibmr;
+
+out_dma_unmap:
+ dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
+ MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
+out_free_mtt:
+ free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
+
+out_remove_stag:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct erdma_mr *mr = to_emr(ibmr);
+
+ if (mr->mem.mtt_nents >= mr->mem.page_cnt)
+ return -1;
+
+ *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
+ mr->mem.mtt_nents++;
+
+ return 0;
+}
+
+int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct erdma_mr *mr = to_emr(ibmr);
+ int num;
+
+ mr->mem.mtt_nents = 0;
+
+ num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
+ erdma_set_page);
+
+ return num;
+}
+
+struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 virt, int access, struct ib_udata *udata)
+{
+ struct erdma_mr *mr = NULL;
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ u32 stag;
+ int ret;
+
+ if (!len || len > dev->attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
+ SZ_2G - SZ_4K, 0);
+ if (ret)
+ goto err_out_free;
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto err_out_put_mtt;
+
+ mr->ibmr.lkey = mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ mr->mem.va = virt;
+ mr->mem.len = len;
+ mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
+ mr->valid = 1;
+ mr->type = ERDMA_MR_TYPE_NORMAL;
+
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto err_out_mr;
+
+ return &mr->ibmr;
+
+err_out_mr:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+err_out_put_mtt:
+ put_mtt_entries(dev, &mr->mem);
+
+err_out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct erdma_mr *mr;
+ struct erdma_dev *dev = to_edev(ibmr->device);
+ struct erdma_cmdq_dereg_mr_req req;
+ int ret;
+
+ mr = to_emr(ibmr);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DEREG_MR);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
+ FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
+
+ put_mtt_entries(dev, &mr->mem);
+
+ kfree(mr);
+ return 0;
+}
+
+int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_dev *dev = to_edev(ibcq->device);
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ int err;
+ struct erdma_cmdq_destroy_cq_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_CQ);
+ req.cqn = cq->cqn;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ if (err)
+ return err;
+
+ if (rdma_is_kernel_res(&cq->ibcq.res)) {
+ dma_free_coherent(&dev->pdev->dev,
+ WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ } else {
+ erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
+ }
+
+ xa_erase(&dev->cq_xa, cq->cqn);
+
+ return 0;
+}
+
+int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ struct erdma_dev *dev = to_edev(ibqp->device);
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ struct erdma_qp_attrs qp_attrs;
+ int err;
+ struct erdma_cmdq_destroy_qp_req req;
+
+ down_write(&qp->state_lock);
+ qp_attrs.state = ERDMA_QP_STATE_ERROR;
+ erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
+ up_write(&qp->state_lock);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_QP);
+ req.qpn = QP_ID(qp);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ if (err)
+ return err;
+
+ erdma_qp_put(qp);
+ wait_for_completion(&qp->safe_free);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res)) {
+ vfree(qp->kern_qp.swr_tbl);
+ vfree(qp->kern_qp.rwr_tbl);
+ dma_free_coherent(
+ &dev->pdev->dev,
+ WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
+ qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
+ dma_free_coherent(
+ &dev->pdev->dev,
+ WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
+ qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
+ } else {
+ put_mtt_entries(dev, &qp->user_qp.sq_mtt);
+ put_mtt_entries(dev, &qp->user_qp.rq_mtt);
+ erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
+ }
+
+ if (qp->cep)
+ erdma_cep_put(qp->cep);
+ xa_erase(&dev->qp_xa, QP_ID(qp));
+
+ return 0;
+}
+
+void erdma_qp_get_ref(struct ib_qp *ibqp)
+{
+ erdma_qp_get(to_eqp(ibqp));
+}
+
+void erdma_qp_put_ref(struct ib_qp *ibqp)
+{
+ erdma_qp_put(to_eqp(ibqp));
+}
+
+int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct erdma_user_mmap_entry *entry;
+ pgprot_t prot;
+ int err;
+
+ rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
+ if (!rdma_entry)
+ return -EINVAL;
+
+ entry = to_emmap(rdma_entry);
+
+ switch (entry->mmap_flag) {
+ case ERDMA_MMAP_IO_NC:
+ /* map doorbell. */
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
+ prot, rdma_entry);
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return err;
+}
+
+void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ kfree(entry);
+}
+
+#define ERDMA_SDB_PAGE 0
+#define ERDMA_SDB_ENTRY 1
+#define ERDMA_SDB_SHARED 2
+
+static void alloc_db_resources(struct erdma_dev *dev,
+ struct erdma_ucontext *ctx)
+{
+ u32 bitmap_idx;
+ struct erdma_devattr *attrs = &dev->attrs;
+
+ if (attrs->disable_dwqe)
+ goto alloc_normal_db;
+
+ /* Try to alloc independent SDB page. */
+ spin_lock(&dev->db_bitmap_lock);
+ bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
+ if (bitmap_idx != attrs->dwqe_pages) {
+ set_bit(bitmap_idx, dev->sdb_page);
+ spin_unlock(&dev->db_bitmap_lock);
+
+ ctx->sdb_type = ERDMA_SDB_PAGE;
+ ctx->sdb_idx = bitmap_idx;
+ ctx->sdb_page_idx = bitmap_idx;
+ ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
+ (bitmap_idx << PAGE_SHIFT);
+ ctx->sdb_page_off = 0;
+
+ return;
+ }
+
+ bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
+ if (bitmap_idx != attrs->dwqe_entries) {
+ set_bit(bitmap_idx, dev->sdb_entry);
+ spin_unlock(&dev->db_bitmap_lock);
+
+ ctx->sdb_type = ERDMA_SDB_ENTRY;
+ ctx->sdb_idx = bitmap_idx;
+ ctx->sdb_page_idx = attrs->dwqe_pages +
+ bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
+ ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
+
+ ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
+ (ctx->sdb_page_idx << PAGE_SHIFT);
+
+ return;
+ }
+
+ spin_unlock(&dev->db_bitmap_lock);
+
+alloc_normal_db:
+ ctx->sdb_type = ERDMA_SDB_SHARED;
+ ctx->sdb_idx = 0;
+ ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
+ ctx->sdb_page_off = 0;
+
+ ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
+}
+
+static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
+{
+ rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
+}
+
+int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
+{
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
+ struct erdma_dev *dev = to_edev(ibctx->device);
+ int ret;
+ struct erdma_uresp_alloc_ctx uresp = {};
+
+ if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ INIT_LIST_HEAD(&ctx->dbrecords_page_list);
+ mutex_init(&ctx->dbrecords_page_mutex);
+
+ alloc_db_resources(dev, ctx);
+
+ ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
+ ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
+
+ if (udata->outlen < sizeof(uresp)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
+ if (!ctx->sq_db_mmap_entry) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
+ if (!ctx->rq_db_mmap_entry) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
+ if (!ctx->cq_db_mmap_entry) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ uresp.dev_id = dev->pdev->device;
+ uresp.sdb_type = ctx->sdb_type;
+ uresp.sdb_offset = ctx->sdb_page_off;
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ erdma_uctx_user_mmap_entries_remove(ctx);
+ atomic_dec(&dev->num_ctx);
+ return ret;
+}
+
+void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
+ struct erdma_dev *dev = to_edev(ibctx->device);
+
+ spin_lock(&dev->db_bitmap_lock);
+ if (ctx->sdb_type == ERDMA_SDB_PAGE)
+ clear_bit(ctx->sdb_idx, dev->sdb_page);
+ else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
+ clear_bit(ctx->sdb_idx, dev->sdb_entry);
+
+ erdma_uctx_user_mmap_entries_remove(ctx);
+
+ spin_unlock(&dev->db_bitmap_lock);
+
+ atomic_dec(&dev->num_ctx);
+}
+
+static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
+ [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
+ [IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
+ [IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
+ [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
+ [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
+ [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
+};
+
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct erdma_qp_attrs new_attrs;
+ enum erdma_qp_attr_mask erdma_attr_mask = 0;
+ struct erdma_qp *qp = to_eqp(ibqp);
+ int ret = 0;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ memset(&new_attrs, 0, sizeof(new_attrs));
+
+ if (attr_mask & IB_QP_STATE) {
+ new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
+
+ erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
+ }
+
+ down_write(&qp->state_lock);
+
+ ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
+
+ up_write(&qp->state_lock);
+
+ return ret;
+}
+
+int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct erdma_qp *qp;
+ struct erdma_dev *dev;
+
+ if (ibqp && qp_attr && qp_init_attr) {
+ qp = to_eqp(ibqp);
+ dev = to_edev(ibqp->device);
+ } else {
+ return -EINVAL;
+ }
+
+ qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
+ qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
+
+ qp_attr->cap.max_send_wr = qp->attrs.sq_size;
+ qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
+ qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
+ qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
+
+ qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
+ qp_attr->max_rd_atomic = qp->attrs.irq_size;
+ qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
+
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
+ struct erdma_ureq_create_cq *ureq)
+{
+ int ret;
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+
+ ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
+ ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
+ 1);
+ if (ret)
+ return ret;
+
+ ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
+ &cq->user_cq.user_dbr_page,
+ &cq->user_cq.db_info_dma_addr);
+ if (ret)
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
+
+ return ret;
+}
+
+static int erdma_init_kernel_cq(struct erdma_cq *cq)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+
+ cq->kern_cq.qbuf =
+ dma_alloc_coherent(&dev->pdev->dev,
+ WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
+ &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
+ if (!cq->kern_cq.qbuf)
+ return -ENOMEM;
+
+ cq->kern_cq.db_record =
+ (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
+ spin_lock_init(&cq->kern_cq.lock);
+ /* use default cqdb addr */
+ cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
+
+ return 0;
+}
+
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_dev *dev = to_edev(ibcq->device);
+ unsigned int depth = attr->cqe;
+ int ret;
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+
+ if (depth > dev->attrs.max_cqe)
+ return -EINVAL;
+
+ depth = roundup_pow_of_two(depth);
+ cq->ibcq.cqe = depth;
+ cq->depth = depth;
+ cq->assoc_eqn = attr->comp_vector + 1;
+
+ ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
+ XA_LIMIT(1, dev->attrs.max_cq - 1),
+ &dev->next_alloc_cqn, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ if (!rdma_is_kernel_res(&ibcq->res)) {
+ struct erdma_ureq_create_cq ureq;
+ struct erdma_uresp_create_cq uresp;
+
+ ret = ib_copy_from_udata(&ureq, udata,
+ min(udata->inlen, sizeof(ureq)));
+ if (ret)
+ goto err_out_xa;
+
+ ret = erdma_init_user_cq(ctx, cq, &ureq);
+ if (ret)
+ goto err_out_xa;
+
+ uresp.cq_id = cq->cqn;
+ uresp.num_cqe = depth;
+
+ ret = ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen));
+ if (ret)
+ goto err_free_res;
+ } else {
+ ret = erdma_init_kernel_cq(cq);
+ if (ret)
+ goto err_out_xa;
+ }
+
+ ret = create_cq_cmd(dev, cq);
+ if (ret)
+ goto err_free_res;
+
+ return 0;
+
+err_free_res:
+ if (!rdma_is_kernel_res(&ibcq->res)) {
+ erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
+ } else {
+ dma_free_coherent(&dev->pdev->dev,
+ WARPPED_BUFSIZE(depth << CQE_SHIFT),
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ }
+
+err_out_xa:
+ xa_erase(&dev->cq_xa, cq->cqn);
+
+ return ret;
+}
+
+void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
+{
+ struct erdma_cmdq_config_mtu_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CONF_MTU);
+ req.mtu = mtu;
+
+ erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
+void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
+{
+ struct ib_event event;
+
+ event.device = &dev->ibdev;
+ event.element.port_num = 1;
+ event.event = reason;
+
+ ib_dispatch_event(&event);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
new file mode 100644
index 000000000000..ab6380635e9e
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_VERBS_H__
+#define __ERDMA_VERBS_H__
+
+#include "erdma.h"
+
+/* RDMA Capability. */
+#define ERDMA_MAX_PD (128 * 1024)
+#define ERDMA_MAX_SEND_WR 4096
+#define ERDMA_MAX_ORD 128
+#define ERDMA_MAX_IRD 128
+#define ERDMA_MAX_SGE_RD 1
+#define ERDMA_MAX_CONTEXT (128 * 1024)
+#define ERDMA_MAX_SEND_SGE 6
+#define ERDMA_MAX_RECV_SGE 1
+#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE))
+#define ERDMA_MAX_FRMR_PA 512
+
+enum {
+ ERDMA_MMAP_IO_NC = 0, /* no cache */
+};
+
+struct erdma_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u64 address;
+ u8 mmap_flag;
+};
+
+struct erdma_ucontext {
+ struct ib_ucontext ibucontext;
+
+ u32 sdb_type;
+ u32 sdb_idx;
+ u32 sdb_page_idx;
+ u32 sdb_page_off;
+ u64 sdb;
+ u64 rdb;
+ u64 cdb;
+
+ struct rdma_user_mmap_entry *sq_db_mmap_entry;
+ struct rdma_user_mmap_entry *rq_db_mmap_entry;
+ struct rdma_user_mmap_entry *cq_db_mmap_entry;
+
+ /* doorbell records */
+ struct list_head dbrecords_page_list;
+ struct mutex dbrecords_page_mutex;
+};
+
+struct erdma_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+};
+
+/*
+ * MemoryRegion definition.
+ */
+#define ERDMA_MAX_INLINE_MTT_ENTRIES 4
+#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */
+#define ERDMA_MR_MAX_MTT_CNT 524288
+#define ERDMA_MTT_ENTRY_SIZE 8
+
+#define ERDMA_MR_TYPE_NORMAL 0
+#define ERDMA_MR_TYPE_FRMR 1
+#define ERDMA_MR_TYPE_DMA 2
+
+#define ERDMA_MR_INLINE_MTT 0
+#define ERDMA_MR_INDIRECT_MTT 1
+
+#define ERDMA_MR_ACC_LR BIT(0)
+#define ERDMA_MR_ACC_LW BIT(1)
+#define ERDMA_MR_ACC_RR BIT(2)
+#define ERDMA_MR_ACC_RW BIT(3)
+
+static inline u8 to_erdma_access_flags(int access)
+{
+ return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
+ (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
+ (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0);
+}
+
+struct erdma_mem {
+ struct ib_umem *umem;
+ void *mtt_buf;
+ u32 mtt_type;
+ u32 page_size;
+ u32 page_offset;
+ u32 page_cnt;
+ u32 mtt_nents;
+
+ u64 va;
+ u64 len;
+
+ u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES];
+};
+
+struct erdma_mr {
+ struct ib_mr ibmr;
+ struct erdma_mem mem;
+ u8 type;
+ u8 access;
+ u8 valid;
+};
+
+struct erdma_user_dbrecords_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ u64 va;
+ int refcnt;
+};
+
+struct erdma_uqp {
+ struct erdma_mem sq_mtt;
+ struct erdma_mem rq_mtt;
+
+ dma_addr_t sq_db_info_dma_addr;
+ dma_addr_t rq_db_info_dma_addr;
+
+ struct erdma_user_dbrecords_page *user_dbr_page;
+
+ u32 rq_offset;
+};
+
+struct erdma_kqp {
+ u16 sq_pi;
+ u16 sq_ci;
+
+ u16 rq_pi;
+ u16 rq_ci;
+
+ u64 *swr_tbl;
+ u64 *rwr_tbl;
+
+ void __iomem *hw_sq_db;
+ void __iomem *hw_rq_db;
+
+ void *sq_buf;
+ dma_addr_t sq_buf_dma_addr;
+
+ void *rq_buf;
+ dma_addr_t rq_buf_dma_addr;
+
+ void *sq_db_info;
+ void *rq_db_info;
+
+ u8 sig_all;
+};
+
+enum erdma_qp_state {
+ ERDMA_QP_STATE_IDLE = 0,
+ ERDMA_QP_STATE_RTR = 1,
+ ERDMA_QP_STATE_RTS = 2,
+ ERDMA_QP_STATE_CLOSING = 3,
+ ERDMA_QP_STATE_TERMINATE = 4,
+ ERDMA_QP_STATE_ERROR = 5,
+ ERDMA_QP_STATE_UNDEF = 7,
+ ERDMA_QP_STATE_COUNT = 8
+};
+
+enum erdma_qp_attr_mask {
+ ERDMA_QP_ATTR_STATE = (1 << 0),
+ ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2),
+ ERDMA_QP_ATTR_ORD = (1 << 3),
+ ERDMA_QP_ATTR_IRD = (1 << 4),
+ ERDMA_QP_ATTR_SQ_SIZE = (1 << 5),
+ ERDMA_QP_ATTR_RQ_SIZE = (1 << 6),
+ ERDMA_QP_ATTR_MPA = (1 << 7)
+};
+
+struct erdma_qp_attrs {
+ enum erdma_qp_state state;
+ enum erdma_cc_alg cc; /* Congestion control algorithm */
+ u32 sq_size;
+ u32 rq_size;
+ u32 orq_size;
+ u32 irq_size;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 cookie;
+#define ERDMA_QP_ACTIVE 0
+#define ERDMA_QP_PASSIVE 1
+ u8 qp_type;
+ u8 pd_len;
+};
+
+struct erdma_qp {
+ struct ib_qp ibqp;
+ struct kref ref;
+ struct completion safe_free;
+ struct erdma_dev *dev;
+ struct erdma_cep *cep;
+ struct rw_semaphore state_lock;
+
+ union {
+ struct erdma_kqp kern_qp;
+ struct erdma_uqp user_qp;
+ };
+
+ struct erdma_cq *scq;
+ struct erdma_cq *rcq;
+
+ struct erdma_qp_attrs attrs;
+ spinlock_t lock;
+};
+
+struct erdma_kcq_info {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+ u32 ci;
+ u32 cmdsn;
+ u32 notify_cnt;
+
+ spinlock_t lock;
+ u8 __iomem *db;
+ u64 *db_record;
+};
+
+struct erdma_ucq_info {
+ struct erdma_mem qbuf_mtt;
+ struct erdma_user_dbrecords_page *user_dbr_page;
+ dma_addr_t db_info_dma_addr;
+};
+
+struct erdma_cq {
+ struct ib_cq ibcq;
+ u32 cqn;
+
+ u32 depth;
+ u32 assoc_eqn;
+
+ union {
+ struct erdma_kcq_info kern_cq;
+ struct erdma_ucq_info user_cq;
+ };
+};
+
+#define QP_ID(qp) ((qp)->ibqp.qp_num)
+
+static inline struct erdma_qp *find_qp_by_qpn(struct erdma_dev *dev, int id)
+{
+ return (struct erdma_qp *)xa_load(&dev->qp_xa, id);
+}
+
+static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
+{
+ return (struct erdma_cq *)xa_load(&dev->cq_xa, id);
+}
+
+void erdma_qp_get(struct erdma_qp *qp);
+void erdma_qp_put(struct erdma_qp *qp);
+int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
+ enum erdma_qp_attr_mask mask);
+void erdma_qp_llp_close(struct erdma_qp *qp);
+void erdma_qp_cm_drop(struct erdma_qp *qp);
+
+static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
+{
+ return container_of(ibctx, struct erdma_ucontext, ibucontext);
+}
+
+static inline struct erdma_pd *to_epd(struct ib_pd *pd)
+{
+ return container_of(pd, struct erdma_pd, ibpd);
+}
+
+static inline struct erdma_mr *to_emr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct erdma_mr, ibmr);
+}
+
+static inline struct erdma_qp *to_eqp(struct ib_qp *qp)
+{
+ return container_of(qp, struct erdma_qp, ibqp);
+}
+
+static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct erdma_cq, ibcq);
+}
+
+static inline struct erdma_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *ibmmap)
+{
+ return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry);
+}
+
+int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data);
+void erdma_dealloc_ucontext(struct ib_ucontext *ibctx);
+int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr,
+ struct ib_udata *data);
+int erdma_get_port_immutable(struct ib_device *dev, u32 port,
+ struct ib_port_immutable *ib_port_immutable);
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *data);
+int erdma_query_port(struct ib_device *dev, u32 port,
+ struct ib_port_attr *attr);
+int erdma_query_gid(struct ib_device *dev, u32 port, int idx,
+ union ib_gid *gid);
+int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data);
+int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *data);
+int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_qp_init_attr *init_attr);
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *data);
+int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 virt, int access, struct ib_udata *udata);
+struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int rights);
+int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *data);
+int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
+void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+void erdma_qp_get_ref(struct ib_qp *ibqp);
+void erdma_qp_put_ref(struct ib_qp *ibqp);
+struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id);
+int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason);
+void erdma_set_mtu(struct erdma_dev *dev, u32 mtu);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index 6eb739052121..14b92e12bf29 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HFI1
tristate "Cornelis OPX Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT && I2C
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 98c813ba4304..877f8e84a672 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -5,7 +5,6 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/numa.h>
@@ -667,7 +666,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* engines, use the same CPU cores as general/control
* context.
*/
- if (cpumask_weight(&entry->def_intr.mask) == 0)
+ if (cpumask_empty(&entry->def_intr.mask))
cpumask_copy(&entry->def_intr.mask,
&entry->general_intr_mask);
}
@@ -687,7 +686,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* vectors, use the same CPU core as the general/control
* context.
*/
- if (cpumask_weight(&entry->comp_vect_mask) == 0)
+ if (cpumask_empty(&entry->comp_vect_mask))
cpumask_copy(&entry->comp_vect_mask,
&entry->general_intr_mask);
}
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index f1245c94ae26..ebe970f76232 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8753,7 +8753,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
/*
* When writing a LCB CSR, out_data contains the full value to
- * to be written, while in_data contains the relative LCB
+ * be written, while in_data contains the relative LCB
* address in 7:0. Do the work here, rather than the caller,
* of distrubting the write data to where it needs to go:
*
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 995991d9709d..166ad6b828dc 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -137,61 +137,6 @@
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
HFI1_USER_SWMINOR)
-#ifndef HFI1_KERN_TYPE
-#define HFI1_KERN_TYPE 0
-#endif
-
-/*
- * Similarly, this is the kernel version going back to the user. It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a Intel release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of hfi1_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
-
-/*
- * Define the driver version number. This is something that refers only
- * to the driver itself, not the software interfaces it supports.
- */
-#ifndef HFI1_DRIVER_VERSION_BASE
-#define HFI1_DRIVER_VERSION_BASE "0.9-294"
-#endif
-
-/* create the final driver version string */
-#ifdef HFI1_IDSTR
-#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
-#else
-#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
-#endif
-
-/*
- * Diagnostics can send a packet by writing the following
- * struct to the diag packet special file.
- *
- * This allows a custom PBC qword, so that special modes and deliberate
- * changes to CRCs can be used.
- */
-#define _DIAG_PKT_VERS 1
-struct diag_pkt {
- __u16 version; /* structure version */
- __u16 unit; /* which device */
- __u16 sw_index; /* send sw index to use */
- __u16 len; /* data length, in bytes */
- __u16 port; /* port number */
- __u16 unused;
- __u32 flags; /* call flags */
- __u64 data; /* user data pointer */
- __u64 pbc; /* PBC for the packet */
-};
-
-/* diag_pkt flags */
-#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
-
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software.
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 22a3cdb940be..80ba1e53c068 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -7,7 +7,6 @@
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/export.h>
-#include <linux/module.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ratelimit.h>
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index 68a184c39941..8ceff7141baf 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -4,7 +4,6 @@
*/
#include <linux/cdev.h>
-#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index e2c634af40e9..8e71bef9d982 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -29,12 +29,6 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the initialization code.
- */
-const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
-
DEFINE_MUTEX(hfi1_mutex); /* general driver use */
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index e8ed05516bf2..7741a1d69097 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -72,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size,
* is in the EFIVAR_FS code and may not be compiled in.
* However, even that is insufficient since it does not cover
* EFI_BUFFER_TOO_SMALL which could be an important return.
- * For now, just split out succces or not found.
+ * For now, just split out success or not found.
*/
ret = status == EFI_SUCCESS ? 0 :
status == EFI_NOT_FOUND ? -ENOENT :
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index e2e4f9f6fae2..3af77a0840ab 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -6,7 +6,6 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/types.h>
#include <linux/bitmap.h>
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1783a6ea5427..f5f9269fdc16 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
unsigned long dim = from->nr_segs;
int idx;
+ if (!HFI1_CAP_IS_KSET(SDMA))
+ return -EINVAL;
idx = srcu_read_lock(&fd->pq_srcu);
pq = srcu_dereference(fd->pq, &fd->pq_srcu);
if (!cq || !pq) {
@@ -963,7 +965,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
init_waitqueue_head(&uctxt->wait);
- strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
+ strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
uctxt->jkey = generate_jkey(current_uid());
hfi1_stats.sps_ctxts++;
@@ -1177,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
goto done;
ret = init_user_ctxt(fd, uctxt);
- if (ret)
+ if (ret) {
+ hfi1_free_ctxt_rcv_groups(uctxt);
goto done;
+ }
user_init(uctxt);
@@ -1220,7 +1224,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
- binfo.sw_version = HFI1_KERN_SWVERSION;
+ binfo.sw_version = HFI1_USER_SWVERSION;
binfo.bthqp = RVT_KDETH_QP_PREFIX;
binfo.jkey = uctxt->jkey;
/*
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 31e63e245ea9..1d77514ebbee 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -5,7 +5,6 @@
#include <linux/firmware.h>
#include <linux/mutex.h>
-#include <linux/module.h>
#include <linux/delay.h>
#include <linux/crc32.h>
@@ -1115,7 +1114,7 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
* Reset all of the fabric serdes for this HFI in preparation to take the
* link to Polling.
*
- * To do a reset, we need to write to to the serdes registers. Unfortunately,
+ * To do a reset, we need to write to the serdes registers. Unfortunately,
* the fabric serdes download to the other HFI on the ASIC will have turned
* off the firmware validation on this HFI. This means we can't write to the
* registers to reset the serdes. Work around this by performing a complete
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 4436ed41547c..436372b31431 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -489,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
u16 shift, mult;
u64 src;
u32 current_egress_rate; /* Mbits /sec */
- u32 max_pkt_time;
+ u64 max_pkt_time;
/*
* max_pkt_time is the maximum packet egress time in units
* of the fabric clock period 1/(805 MHz).
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
index 909122934246..aec60d4888eb 100644
--- a/drivers/infiniband/hw/hfi1/ipoib.h
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -55,7 +55,7 @@ union hfi1_ipoib_flow {
*/
struct ipoib_txreq {
struct sdma_txreq txreq;
- struct hfi1_sdma_header sdma_hdr;
+ struct hfi1_sdma_header *sdma_hdr;
int sdma_status;
int complete;
struct hfi1_ipoib_dev_priv *priv;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
index e1a2b02bbd91..5d814afdf7f3 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev)
int ret;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
ret = priv->netdev_ops->ndo_init(dev);
if (ret)
- return ret;
+ goto out_ret;
ret = hfi1_netdev_add_data(priv->dd,
qpn_from_mac(priv->netdev->dev_addr),
dev);
if (ret < 0) {
priv->netdev_ops->ndo_uninit(dev);
- return ret;
+ goto out_ret;
}
return 0;
+out_ret:
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
}
static void hfi1_ipoib_dev_uninit(struct net_device *dev)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+
hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
priv->netdev_ops->ndo_uninit(dev);
@@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
hfi1_ipoib_rxq_deinit(priv->netdev);
free_percpu(dev->tstats);
-}
-
-static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
-{
- hfi1_ipoib_netdev_dtor(dev);
- free_netdev(dev);
+ dev->tstats = NULL;
}
static void hfi1_ipoib_set_id(struct net_device *dev, int id)
@@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
priv->port_num = port_num;
priv->netdev_ops = netdev->netdev_ops;
- netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
-
ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
rc = hfi1_ipoib_txreq_init(priv);
if (rc) {
dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
- hfi1_ipoib_free_rdma_netdev(netdev);
return rc;
}
rc = hfi1_ipoib_rxq_init(netdev);
if (rc) {
dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
- hfi1_ipoib_free_rdma_netdev(netdev);
+ hfi1_ipoib_txreq_deinit(priv);
return rc;
}
+ netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
netdev->needs_free_netdev = true;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
index 3afa7545242c..629691a572ef 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_rx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c
@@ -11,13 +11,10 @@
static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
{
- void *dst_data;
-
skb_checksum_none_assert(skb);
skb->protocol = *((__be16 *)data);
- dst_data = skb_put(skb, size);
- memcpy(dst_data, data, size);
+ skb_put_data(skb, data, size);
skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index f4010890309f..5d9a7b09ca37 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -122,7 +122,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
dd_dev_warn(priv->dd,
"%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
__func__, tx->sdma_status,
- le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
+ le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
tx->txq->sde->this_idx);
}
@@ -231,7 +231,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
{
struct hfi1_devdata *dd = txp->dd;
struct sdma_txreq *txreq = &tx->txreq;
- struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
u16 pkt_bytes =
sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
int ret;
@@ -256,7 +256,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
struct ipoib_txparms *txp)
{
struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
- struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
struct sk_buff *skb = tx->skb;
struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
struct rdma_ah_attr *ah_attr = txp->ah_attr;
@@ -483,7 +483,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
if (likely(!ret)) {
tx_ok:
trace_sdma_output_ibhdr(txq->priv->dd,
- &tx->sdma_hdr.hdr,
+ &tx->sdma_hdr->hdr,
ib_is_sc5(txp->flow.sc5));
hfi1_ipoib_check_queue_depth(txq);
return NETDEV_TX_OK;
@@ -547,7 +547,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
hfi1_ipoib_check_queue_depth(txq);
trace_sdma_output_ibhdr(txq->priv->dd,
- &tx->sdma_hdr.hdr,
+ &tx->sdma_hdr->hdr,
ib_is_sc5(txp->flow.sc5));
if (!netdev_xmit_more())
@@ -683,7 +683,8 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
{
struct net_device *dev = priv->netdev;
u32 tx_ring_size, tx_item_size;
- int i;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ int i, j;
/*
* Ring holds 1 less than tx_ring_size
@@ -701,7 +702,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
for (i = 0; i < dev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct ipoib_txreq *tx;
+ tx_ring = &txq->tx_ring;
iowait_init(&txq->wait,
0,
hfi1_ipoib_flush_txq,
@@ -725,18 +728,21 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
priv->dd->node);
txq->tx_ring.items =
- kcalloc_node(tx_ring_size, tx_item_size,
- GFP_KERNEL, priv->dd->node);
+ kvzalloc_node(array_size(tx_ring_size, tx_item_size),
+ GFP_KERNEL, priv->dd->node);
if (!txq->tx_ring.items)
goto free_txqs;
txq->tx_ring.max_items = tx_ring_size;
- txq->tx_ring.shift = ilog2(tx_ring_size);
+ txq->tx_ring.shift = ilog2(tx_item_size);
txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++)
+ hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
+ kzalloc_node(sizeof(*tx->sdma_hdr),
+ GFP_KERNEL, priv->dd->node);
- netif_tx_napi_add(dev, &txq->napi,
- hfi1_ipoib_poll_tx_ring,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
}
return 0;
@@ -746,7 +752,10 @@ free_txqs:
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
netif_napi_del(&txq->napi);
- kfree(txq->tx_ring.items);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
}
kfree(priv->txqs);
@@ -780,17 +789,20 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
{
- int i;
+ int i, j;
for (i = 0; i < priv->netdev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
iowait_cancel_work(&txq->wait);
iowait_sdma_drain(&txq->wait);
hfi1_ipoib_drain_tx_list(txq);
netif_napi_del(&txq->napi);
hfi1_ipoib_drain_tx_ring(txq);
- kfree(txq->tx_ring.items);
+ for (j = 0; j < tx_ring->max_items; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
}
kfree(priv->txqs);
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 876cc78a22cc..7333646021bb 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -80,6 +80,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
unsigned long flags;
struct list_head del_list;
+ /* Prevent freeing of mm until we are completely finished. */
+ mmgrab(handler->mn.mm);
+
/* Unregister first so we don't get any more notifications. */
mmu_notifier_unregister(&handler->mn, handler->mn.mm);
@@ -102,6 +105,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
do_remove(handler, &del_list);
+ /* Now the mm may be freed. */
+ mmdrop(handler->mn.mm);
+
kfree(handler);
}
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 03b098a494b5..3dfa5aff2512 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
- netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+ netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 3d42bd2b36bd..51ae58c02b15 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -913,8 +913,7 @@ void sc_disable(struct send_context *sc)
spin_unlock(&sc->release_lock);
write_seqlock(&sc->waitlock);
- if (!list_empty(&sc->piowait))
- list_move(&sc->piowait, &wake_list);
+ list_splice_init(&sc->piowait, &wake_list);
write_sequnlock(&sc->waitlock);
while (!list_empty(&wake_list)) {
struct iowait *wait;
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 136f9a99e1e0..7690f996d5e3 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -172,7 +172,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
}
/*
- * Read nbytes from "from" and and place them in the low bytes
+ * Read nbytes from "from" and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f07d328689d3..a95b654f5254 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1288,11 +1288,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
kvfree(sde->tx_ring);
sde->tx_ring = NULL;
}
- spin_lock_irq(&dd->sde_map_lock);
- sdma_map_free(rcu_access_pointer(dd->sdma_map));
- RCU_INIT_POINTER(dd->sdma_map, NULL);
- spin_unlock_irq(&dd->sde_map_lock);
- synchronize_rcu();
+ if (rcu_access_pointer(dd->sdma_map)) {
+ spin_lock_irq(&dd->sde_map_lock);
+ sdma_map_free(rcu_access_pointer(dd->sdma_map));
+ RCU_INIT_POINTER(dd->sdma_map, NULL);
+ spin_unlock_irq(&dd->sde_map_lock);
+ synchronize_rcu();
+ }
kfree(dd->per_sdma);
dd->per_sdma = NULL;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 2a7abf7a1f7f..18b05ffb415a 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -850,7 +850,7 @@ void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
int i;
for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
- rcd->flows[i].generation = mask_generation(prandom_u32());
+ rcd->flows[i].generation = mask_generation(get_random_u32());
kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
}
}
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 707f1053f0b7..582b6f68df3d 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -26,14 +26,10 @@ DECLARE_EVENT_CLASS(hfi1_trace_template,
TP_PROTO(const char *function, struct va_format *vaf),
TP_ARGS(function, vaf),
TP_STRUCT__entry(__string(function, function)
- __dynamic_array(char, msg, MAX_MSG_LEN)
+ __vstring(msg, vaf->fmt, vaf->va)
),
TP_fast_assign(__assign_str(function, function);
- WARN_ON_ONCE(vsnprintf
- (__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >=
- MAX_MSG_LEN);
+ __assign_vstr(msg, vaf->fmt, vaf->va);
),
TP_printk("(%s) %s",
__get_str(function),
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5b11c8282744..a71c5a36ceba 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -161,9 +161,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
if (!pq->reqs)
goto pq_reqs_nomem;
- pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
- sizeof(*pq->req_in_use),
- GFP_KERNEL);
+ pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
@@ -210,7 +208,7 @@ cq_comps_nomem:
cq_nomem:
kmem_cache_destroy(pq->txreq_cache);
pq_txreq_nomem:
- kfree(pq->req_in_use);
+ bitmap_free(pq->req_in_use);
pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
@@ -257,7 +255,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
pq->wait,
!atomic_read(&pq->n_reqs));
kfree(pq->reqs);
- kfree(pq->req_in_use);
+ bitmap_free(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
flush_pq_iowait(pq);
kfree(pq);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index dc9211f3a009..e6e17984553c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1300,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS |
- IB_DEVICE_RDMA_NETDEV_OPA;
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1397,8 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u32 port_num,
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
- props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
- ib_mtu_enum_to_int(props->max_mtu);
+ props->phys_mtu = hfi1_max_mtu;
return 0;
}
@@ -1448,12 +1447,10 @@ static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num)
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
- int ret;
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
OPA_LINKDOWN_REASON_UNKNOWN);
- ret = set_link_state(ppd, HLS_DN_DOWNDEF);
- return ret;
+ return set_link_state(ppd, HLS_DN_DOWNDEF);
}
static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
@@ -1802,7 +1799,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ib_set_device_ops(ibdev, &hfi1_dev_ops);
- strlcpy(ibdev->node_desc, init_utsname()->nodename,
+ strscpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
/*
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 38565532d654..7f30f32b34dc 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -391,9 +391,6 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
bool *call_send);
-extern const u32 rc_only_opcode;
-extern const u32 uc_only_opcode;
-
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index 18d10ebf900b..ab3fbba70789 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -5,22 +5,9 @@ config INFINIBAND_HNS
depends on ARM64 || (COMPILE_TEST && 64BIT)
depends on (HNS_DSAF && HNS_ENET) || HNS3
help
- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
- is used in Hisilicon Hip06 and more further ICT SoC based on
- platform device.
+ This is a RoCE/RDMA driver for the Hisilicon RoCE engine.
- To compile HIP06 or HIP08 driver as module, choose M here.
-
-config INFINIBAND_HNS_HIP06
- bool "Hisilicon Hip06 Family RoCE support"
- depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
- depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y)
- help
- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
- Hip07 SoC. These RoCE engines are platform devices.
-
- To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
- module will be called hns-roce-hw-v1
+ To compile HIP08 driver as module, choose M here.
config INFINIBAND_HNS_HIP08
bool "Hisilicon Hip08 Family RoCE support"
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index e105945b94a1..a7d259238305 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -9,12 +9,7 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
-ifdef CONFIG_INFINIBAND_HNS_HIP06
-hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs)
-obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o
-endif
-
ifdef CONFIG_INFINIBAND_HNS_HIP08
-hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs)
+hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs)
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index cc258edec331..480c062dd04f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
@@ -42,9 +41,8 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
u16 sport;
if (!fl)
- sport = get_random_u32() %
- (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 -
- IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) +
+ sport = prandom_u32_max(IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 -
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) +
IB_ROCE_UDP_ENCAP_VALID_PORT_MIN;
else
sport = rdma_flow_label_to_udp_sport(fl);
@@ -61,7 +59,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct hns_roce_ah *ah = to_hr_ah(ibah);
int ret = 0;
- if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata)
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata)
return -EOPNOTSUPP;
ah->av.port = rdma_ah_get_port_num(ah_attr);
@@ -80,7 +78,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
/* HIP08 needs to record vlan info in Address Vector */
- if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) {
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
&ah->av.vlan_id, NULL);
if (ret)
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index d4fa0fd52294..11a78ceae568 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -31,10 +31,9 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <linux/vmalloc.h>
-#include "hns_roce_device.h"
#include <rdma/ib_umem.h>
+#include "hns_roce_device.h"
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 84f3f2b5f097..864413607571 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -31,7 +31,6 @@
*/
#include <linux/dmapool.h>
-#include <linux/platform_device.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
@@ -39,45 +38,36 @@
#define CMD_POLL_TOKEN 0xffff
#define CMD_MAX_NUM 32
-static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier,
- u8 op_modifier, u16 op, u16 token,
- int event)
+static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- return hr_dev->hw->post_mbox(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, token, event);
+ return hr_dev->hw->post_mbox(hr_dev, mbox_msg);
}
/* this should be called with "poll_sem" */
-static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op,
- unsigned int timeout)
+static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
int ret;
- ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- CMD_POLL_TOKEN, 0);
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
if (ret) {
dev_err_ratelimited(hr_dev->dev,
- "failed to post mailbox %x in poll mode, ret = %d.\n",
- op, ret);
+ "failed to post mailbox 0x%x in poll mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
return ret;
}
- return hr_dev->hw->poll_mbox_done(hr_dev, timeout);
+ return hr_dev->hw->poll_mbox_done(hr_dev);
}
-static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned int timeout)
+static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
int ret;
down(&hr_dev->cmd.poll_sem);
- ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_poll(hr_dev, mbox_msg);
up(&hr_dev->cmd.poll_sem);
return ret;
@@ -91,7 +81,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
if (unlikely(token != context->token)) {
dev_err_ratelimited(hr_dev->dev,
- "[cmd] invalid ae token %x,context token is %x!\n",
+ "[cmd] invalid ae token 0x%x, context token is 0x%x.\n",
token, context->token);
return;
}
@@ -101,10 +91,8 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
complete(&context->done);
}
-static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op,
- unsigned int timeout)
+static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
struct hns_roce_cmdq *cmd = &hr_dev->cmd;
struct hns_roce_cmd_context *context;
@@ -125,66 +113,70 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
reinit_completion(&context->done);
- ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- context->token, 1);
+ mbox_msg->token = context->token;
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
if (ret) {
dev_err_ratelimited(dev,
- "failed to post mailbox %x in event mode, ret = %d.\n",
- op, ret);
+ "failed to post mailbox 0x%x in event mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
goto out;
}
if (!wait_for_completion_timeout(&context->done,
- msecs_to_jiffies(timeout))) {
- dev_err_ratelimited(dev, "[cmd] token %x mailbox %x timeout.\n",
- context->token, op);
+ msecs_to_jiffies(HNS_ROCE_CMD_TIMEOUT_MSECS))) {
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x timeout.\n",
+ context->token, mbox_msg->cmd);
ret = -EBUSY;
goto out;
}
ret = context->result;
if (ret)
- dev_err_ratelimited(dev, "[cmd] token %x mailbox %x error %d\n",
- context->token, op, ret);
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x error %d.\n",
+ context->token, mbox_msg->cmd, ret);
out:
context->busy = 0;
return ret;
}
-static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned int timeout)
+static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
int ret;
down(&hr_dev->cmd.event_sem);
- ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_wait(hr_dev, mbox_msg);
up(&hr_dev->cmd.event_sem);
return ret;
}
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
- unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned int timeout)
+ u8 cmd, unsigned long tag)
{
+ struct hns_roce_mbox_msg mbox_msg = {};
bool is_busy;
if (hr_dev->hw->chk_mbox_avail)
if (!hr_dev->hw->chk_mbox_avail(hr_dev, &is_busy))
return is_busy ? -EBUSY : 0;
- if (hr_dev->cmd.use_events)
- return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- timeout);
- else
- return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- timeout);
+ mbox_msg.in_param = in_param;
+ mbox_msg.out_param = out_param;
+ mbox_msg.cmd = cmd;
+ mbox_msg.tag = tag;
+
+ if (hr_dev->cmd.use_events) {
+ mbox_msg.event_en = 1;
+
+ return hns_roce_cmd_mbox_wait(hr_dev, &mbox_msg);
+ } else {
+ mbox_msg.event_en = 0;
+ mbox_msg.token = CMD_POLL_TOKEN;
+
+ return hns_roce_cmd_mbox_poll(hr_dev, &mbox_msg);
+ }
}
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
@@ -270,3 +262,15 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma);
kfree(mailbox);
}
+
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cmd, idx);
+}
+
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, 0, 0, cmd, idx);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 8025e7f657fa..052a3d60905a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -140,12 +140,16 @@ enum {
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
- unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned int timeout);
+ u8 cmd, unsigned long tag);
struct hns_roce_cmd_mailbox *
hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox);
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx);
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd,
+ unsigned long idx);
#endif /* _HNS_ROCE_CMD_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index b73e55de83ac..465d1f914b6c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -104,208 +104,6 @@
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
-#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
-#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
-
-#define ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S 5
-
-#define ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S 6
-
-#define ROCEE_GLB_CFG_ROCEE_PORT_ST_S 10
-#define ROCEE_GLB_CFG_ROCEE_PORT_ST_M \
- (((1UL << 6) - 1) << ROCEE_GLB_CFG_ROCEE_PORT_ST_S)
-
-#define ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S 16
-
-#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S 0
-#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M \
- (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S 24
-#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M \
- (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S 0
-#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M \
- (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S 24
-#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M \
- (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S)
-
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S 0
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M \
- (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S)
-
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S 16
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M \
- (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S)
-
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S 0
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M \
- (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S)
-
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S 16
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M \
- (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S)
-
-#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_S 0
-#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_M \
- (((1UL << 8) - 1) << ROCEE_RAQ_WL_ROCEE_RAQ_WL_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S 0
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M \
- (((1UL << 15) - 1) << \
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S 16
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M \
- (((1UL << 4) - 1) << \
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S 20
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE 21
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S 0
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S)
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S 5
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S)
-
-#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S 0
-#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S)
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S 5
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S)
-
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S 0
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S)
-
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S 8
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S 0
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M \
- (((1UL << 19) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_S 19
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S 20
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M \
- (((1UL << 2) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S 22
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31
-
-#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0
-#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \
- (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S)
-
-#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0
-#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \
- (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S)
-
-#define ROCEE_MB6_ROCEE_MB_CMD_S 0
-#define ROCEE_MB6_ROCEE_MB_CMD_M \
- (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S)
-
-#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8
-#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \
- (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S)
-
-#define ROCEE_MB6_ROCEE_MB_EVENT_S 14
-
-#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15
-
-#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16
-#define ROCEE_MB6_ROCEE_MB_TOKEN_M \
- (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \
- (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \
- (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \
- (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31
-
-#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0
-#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \
- (((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S)
-
-#define ROCEE_SMAC_H_ROCEE_PORT_MTU_S 16
-#define ROCEE_SMAC_H_ROCEE_PORT_MTU_M \
- (((1UL << 4) - 1) << ROCEE_SMAC_H_ROCEE_PORT_MTU_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S 0
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M \
- (((1UL << 2) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S 8
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M \
- (((1UL << 4) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S 17
-
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S 0
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M \
- (((1UL << 5) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S)
-
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S 16
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M \
- (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S)
-
-#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S 0
-#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M \
- (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S)
-
-#define ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S 16
-#define ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S 1
-#define ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S 0
-
-#define ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S 0
-#define ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S 1
-
-#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0
-
-#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0
-#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \
- (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
-
-#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0
-#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \
- (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
-
-#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
-#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \
- (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
-
-#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0
-#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \
- (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S)
-
-#define ROCEE_SDB_CNT_CMP_BITS 16
-
-#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20
-
-#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0
-
/*************ROCEE_REG DEFINITION****************/
#define ROCEE_VENDOR_ID_REG 0x0
#define ROCEE_VENDOR_PART_ID_REG 0x4
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index d763f097599f..736dc2f993b4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "hns_roce_device.h"
@@ -101,12 +100,39 @@ static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
mutex_unlock(&cq_table->bank_mutex);
}
+static int hns_roce_create_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ u64 *mtts, dma_addr_t dma_handle)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ibdev_err(ibdev, "failed to alloc mailbox for CQC.\n");
+ return PTR_ERR(mailbox);
+ }
+
+ hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_CQC,
+ hr_cq->cqn);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to send create cmd for CQ(0x%lx), ret = %d.\n",
+ hr_cq->cqn, ret);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_cmd_mailbox *mailbox;
- u64 mtts[MTT_MIN_COUNT] = { 0 };
+ u64 mtts[MTT_MIN_COUNT] = {};
dma_addr_t dma_handle;
int ret;
@@ -122,7 +148,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
if (ret) {
ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n",
hr_cq->cqn, ret);
- goto err_out;
+ return ret;
}
ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
@@ -131,41 +157,17 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
goto err_put;
}
- /* Allocate mailbox memory */
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_xa;
- }
-
- hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
-
- /* Send mailbox to hw */
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0,
- HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- if (ret) {
- ibdev_err(ibdev,
- "failed to send create cmd for CQ(0x%lx), ret = %d.\n",
- hr_cq->cqn, ret);
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle);
+ if (ret)
goto err_xa;
- }
-
- hr_cq->cons_index = 0;
- hr_cq->arm_sn = 1;
-
- refcount_set(&hr_cq->refcount, 1);
- init_completion(&hr_cq->free);
return 0;
err_xa:
xa_erase(&cq_table->array, hr_cq->cqn);
-
err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
-err_out:
return ret;
}
@@ -175,9 +177,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
struct device *dev = hr_dev->dev;
int ret;
- ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1,
- HNS_ROCE_CMD_DESTROY_CQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
+ hr_cq->cqn);
if (ret)
dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
hr_cq->cqn);
@@ -406,15 +407,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
goto err_cqn;
}
- /*
- * For the QP created by kernel space, tptr value should be initialized
- * to zero; For the QP created by user space, it will cause synchronous
- * problems if tptr is set to zero here, so we initialize it in user
- * space.
- */
- if (!udata && hr_cq->tptr_addr)
- *hr_cq->tptr_addr = 0;
-
if (udata) {
resp.cqn = hr_cq->cqn;
ret = ib_copy_to_udata(udata, &resp,
@@ -423,6 +415,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
goto err_cqc;
}
+ hr_cq->cons_index = 0;
+ hr_cq->arm_sn = 1;
+ refcount_set(&hr_cq->refcount, 1);
+ init_completion(&hr_cq->free);
+
return 0;
err_cqc:
@@ -441,9 +438,6 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- if (hr_dev->hw->destroy_cq)
- hr_dev->hw->destroy_cq(ib_cq, udata);
-
free_cqc(hr_dev, hr_cq);
free_cqn(hr_dev, hr_cq->cqn);
free_cq_db(hr_dev, hr_cq, udata);
@@ -460,7 +454,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
hr_cq = xa_load(&hr_dev->cq_table.array,
cqn & (hr_dev->caps.num_cqs - 1));
if (!hr_cq) {
- dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n",
+ dev_warn(hr_dev->dev, "completion event for bogus CQ 0x%06x\n",
cqn);
return;
}
@@ -481,14 +475,14 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
hr_cq = xa_load(&hr_dev->cq_table.array,
cqn & (hr_dev->caps.num_cqs - 1));
if (!hr_cq) {
- dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn);
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
return;
}
if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
- dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n",
+ dev_err(dev, "unexpected event type 0x%x on CQ 0x%06x\n",
event_type, cqn);
return;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index 751470c7a2ce..5c4c0480832b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -4,7 +4,6 @@
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*/
-#include <linux/platform_device.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 43e17d61cb63..723e55a7de8d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -36,36 +36,18 @@
#include <rdma/ib_verbs.h>
#include <rdma/hns-abi.h>
-#define DRV_NAME "hns_roce"
-
#define PCI_REVISION_ID_HIP08 0x21
#define PCI_REVISION_ID_HIP09 0x30
-#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
-
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
#define HNS_ROCE_IB_MIN_SQ_STRIDE 6
#define BA_BYTE_LEN 8
-/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
-#define HNS_ROCE_MIN_WQE_NUM 0x20
#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
-/* Hardware specification only for v1 engine */
-#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
-#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
-
-#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
-#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
- (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS)
-#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
-#define HNS_ROCE_MIN_CQE_CNT 16
-
-#define HNS_ROCE_RESERVED_SGE 1
-
#define HNS_ROCE_MAX_IRQ_NUM 128
#define HNS_ROCE_SGE_IN_WQE 2
@@ -102,18 +84,12 @@
#define HNS_ROCE_FRMR_MAX_PA 512
#define PKEY_ID 0xffff
-#define GUID_LEN 8
#define NODE_DESC_SIZE 64
#define DB_REG_OFFSET 0x1000
/* Configure to HW for PAGE_SIZE larger than 4KB */
#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
-#define PAGES_SHIFT_8 8
-#define PAGES_SHIFT_16 16
-#define PAGES_SHIFT_24 24
-#define PAGES_SHIFT_32 32
-
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
@@ -122,11 +98,6 @@
#define CQ_BANKID_SHIFT 2
-/* The chip implementation of the consumer index is calculated
- * according to twice the actual EQ depth
- */
-#define EQ_DEPTH_COEFF 2
-
enum {
SERV_TYPE_RC,
SERV_TYPE_UC,
@@ -135,16 +106,6 @@ enum {
SERV_TYPE_XRC = 5,
};
-enum hns_roce_qp_state {
- HNS_ROCE_QP_STATE_RST,
- HNS_ROCE_QP_STATE_INIT,
- HNS_ROCE_QP_STATE_RTR,
- HNS_ROCE_QP_STATE_RTS,
- HNS_ROCE_QP_STATE_SQD,
- HNS_ROCE_QP_STATE_ERR,
- HNS_ROCE_QP_NUM_STATE,
-};
-
enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01,
HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02,
@@ -168,8 +129,6 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
};
-#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
-
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
@@ -182,6 +141,7 @@ enum {
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12),
HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
HNS_ROCE_CAP_FLAG_STASH = BIT(17),
};
@@ -227,7 +187,7 @@ struct hns_roce_uar {
enum hns_roce_mmap_type {
HNS_ROCE_MMAP_TYPE_DB = 1,
- HNS_ROCE_MMAP_TYPE_TPTR,
+ HNS_ROCE_MMAP_TYPE_DWQE,
};
struct hns_user_mmap_entry {
@@ -242,7 +202,6 @@ struct hns_roce_ucontext {
struct list_head page_list;
struct mutex page_mutex;
struct hns_user_mmap_entry *db_mmap_entry;
- struct hns_user_mmap_entry *tptr_mmap_entry;
};
struct hns_roce_pd {
@@ -281,7 +240,6 @@ struct hns_roce_hem_table {
/* Single obj size */
unsigned long obj_size;
unsigned long table_chunk_size;
- int lowmem;
struct mutex mutex;
struct hns_roce_hem **hem;
u64 **bt_l1;
@@ -345,19 +303,16 @@ struct hns_roce_mw {
u32 pbl_buf_pg_sz;
};
-/* Only support 4K page size for mr register */
-#define MR_SIZE_4K 0
-
struct hns_roce_mr {
struct ib_mr ibmr;
u64 iova; /* MR's virtual original addr */
u64 size; /* Address range of MR */
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
- u32 access; /* Access permission of MR */
+ u32 access; /* Access permission of MR */
int enabled; /* MR's active status */
- int type; /* MR's register type */
- u32 pbl_hop_num; /* multi-hop number */
+ int type; /* MR's register type */
+ u32 pbl_hop_num; /* multi-hop number */
struct hns_roce_mtr pbl_mtr;
u32 npages;
dma_addr_t *page_list;
@@ -374,17 +329,17 @@ struct hns_roce_wq {
u32 wqe_cnt; /* WQE num */
u32 max_gs;
u32 rsv_sge;
- int offset;
- int wqe_shift; /* WQE size */
+ u32 offset;
+ u32 wqe_shift; /* WQE size */
u32 head;
u32 tail;
void __iomem *db_reg;
};
struct hns_roce_sge {
- unsigned int sge_cnt; /* SGE num */
- int offset;
- int sge_shift; /* SGE size */
+ unsigned int sge_cnt; /* SGE num */
+ u32 offset;
+ u32 sge_shift; /* SGE size */
};
struct hns_roce_buf_list {
@@ -453,7 +408,6 @@ struct hns_roce_cq {
u32 cons_index;
u32 *set_ci_db;
void __iomem *db_reg;
- u16 *tptr_addr;
int arm_sn;
int cqe_size;
unsigned long cqn;
@@ -468,7 +422,7 @@ struct hns_roce_cq {
struct hns_roce_idx_que {
struct hns_roce_mtr mtr;
- int entry_shift;
+ u32 entry_shift;
unsigned long *bitmap;
u32 head;
u32 tail;
@@ -480,7 +434,7 @@ struct hns_roce_srq {
u32 wqe_cnt;
int max_gs;
u32 rsv_sge;
- int wqe_shift;
+ u32 wqe_shift;
u32 cqn;
u32 xrcdn;
void __iomem *db_reg;
@@ -539,10 +493,6 @@ struct hns_roce_srq_table {
struct hns_roce_hem_table table;
};
-struct hns_roce_raq_table {
- struct hns_roce_buf_list *e_raq_buf;
-};
-
struct hns_roce_av {
u8 port;
u8 gid_index;
@@ -572,6 +522,11 @@ struct hns_roce_cmd_context {
u16 busy;
};
+enum hns_roce_cmdq_state {
+ HNS_ROCE_CMDQ_STATE_NORMAL,
+ HNS_ROCE_CMDQ_STATE_FATAL_ERR,
+};
+
struct hns_roce_cmdq {
struct dma_pool *pool;
struct semaphore poll_sem;
@@ -591,6 +546,7 @@ struct hns_roce_cmdq {
* close device, switch into poll mode(non event mode)
*/
u8 use_events;
+ enum hns_roce_cmdq_state state;
};
struct hns_roce_cmd_mailbox {
@@ -598,6 +554,15 @@ struct hns_roce_cmd_mailbox {
dma_addr_t dma;
};
+struct hns_roce_mbox_msg {
+ u64 in_param;
+ u64 out_param;
+ u8 cmd;
+ u32 tag;
+ u16 token;
+ u8 event_en;
+};
+
struct hns_roce_dev;
struct hns_roce_rinl_sge {
@@ -627,17 +592,12 @@ struct hns_roce_work {
u32 queue_num;
};
-enum {
- HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
-};
-
struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_wq rq;
struct hns_roce_db rdb;
struct hns_roce_db sdb;
unsigned long en_flags;
- u32 doorbell_qpn;
enum ib_sig_type sq_signal_bits;
struct hns_roce_wq sq;
@@ -650,9 +610,7 @@ struct hns_roce_qp {
u8 sl;
u8 resp_depth;
u8 state;
- u32 access_flags;
u32 atomic_rd_en;
- u32 pkey_index;
u32 qkey;
void (*event)(struct hns_roce_qp *qp,
enum hns_roce_event event_type);
@@ -667,14 +625,16 @@ struct hns_roce_qp {
u32 next_sge;
enum ib_mtu path_mtu;
u32 max_inline_data;
+ u8 free_mr_en;
/* 0: flush needed, 1: unneeded */
unsigned long flush_flag;
struct hns_roce_work flush_work;
struct hns_roce_rinl_buf rq_inl_buf;
- struct list_head node; /* all qps are on a list */
- struct list_head rq_node; /* all recv qps are on a list */
- struct list_head sq_node; /* all send qps are on a list */
+ struct list_head node; /* all qps are on a list */
+ struct list_head rq_node; /* all recv qps are on a list */
+ struct list_head sq_node; /* all send qps are on a list */
+ struct hns_user_mmap_entry *dwqe_mmap_entry;
};
struct hns_roce_ib_iboe {
@@ -684,16 +644,16 @@ struct hns_roce_ib_iboe {
u8 phy_port[HNS_ROCE_MAX_PORTS];
};
-enum {
- HNS_ROCE_EQ_STAT_INVALID = 0,
- HNS_ROCE_EQ_STAT_VALID = 2,
-};
-
struct hns_roce_ceqe {
__le32 comp;
__le32 rsv[15];
};
+#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l)
+
+#define CEQE_CQN CEQE_FIELD_LOC(23, 0)
+#define CEQE_OWNER CEQE_FIELD_LOC(31, 31)
+
struct hns_roce_aeqe {
__le32 asyn;
union {
@@ -713,6 +673,13 @@ struct hns_roce_aeqe {
__le32 rsv[12];
};
+#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l)
+
+#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0)
+#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8)
+#define AEQE_OWNER AEQE_FIELD_LOC(31, 31)
+#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32)
+
struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
void __iomem *db_reg;
@@ -720,12 +687,9 @@ struct hns_roce_eq {
int type_flag; /* Aeq:1 ceq:0 */
int eqn;
u32 entries;
- u32 log_entries;
int eqe_size;
int irq;
- int log_page_size;
u32 cons_index;
- struct hns_roce_buf_list *buf_list;
int over_ignore;
int coalesce;
int arm_st;
@@ -740,7 +704,6 @@ struct hns_roce_eq {
struct hns_roce_eq_table {
struct hns_roce_eq *eq;
- void __iomem **eqc_base; /* only for hw v1 */
};
enum cong_type {
@@ -761,19 +724,17 @@ struct hns_roce_caps {
u32 max_sq_sg;
u32 max_sq_inline;
u32 max_rq_sg;
- u32 max_extend_sg;
+ u32 rsv0;
u32 num_qps;
u32 num_pi_qps;
u32 reserved_qps;
- int num_qpc_timer;
- int num_cqc_timer;
- int num_srqs;
+ u32 num_srqs;
u32 max_wqes;
u32 max_srq_wrs;
u32 max_srq_sges;
u32 max_sq_desc_sz;
u32 max_rq_desc_sz;
- u32 max_srq_desc_sz;
+ u32 rsv2;
int max_qp_init_rdma;
int max_qp_dest_rdma;
u32 num_cqs;
@@ -781,12 +742,12 @@ struct hns_roce_caps {
u32 min_cqes;
u32 min_wqes;
u32 reserved_cqs;
- int reserved_srqs;
+ u32 reserved_srqs;
int num_aeq_vectors;
int num_comp_vectors;
int num_other_vectors;
u32 num_mtpts;
- u32 num_mtt_segs;
+ u32 rsv1;
u32 num_srqwqe_segs;
u32 num_idx_segs;
int reserved_mrws;
@@ -855,7 +816,7 @@ struct hns_roce_caps {
u32 cqc_timer_ba_pg_sz;
u32 cqc_timer_buf_pg_sz;
u32 cqc_timer_hop_num;
- u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */
+ u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
u32 srqwqe_ba_pg_sz;
@@ -874,7 +835,7 @@ struct hns_roce_caps {
u32 gmv_hop_num;
u32 sl_num;
u32 llm_buf_pg_sz;
- u32 chunk_sz; /* chunk size in non multihop mode */
+ u32 chunk_sz; /* chunk size in non multihop mode */
u64 flags;
u16 default_ceq_max_cnt;
u16 default_ceq_period;
@@ -885,11 +846,6 @@ struct hns_roce_caps {
enum cong_type cong_type;
};
-struct hns_roce_dfx_hw {
- int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn,
- int *buffer);
-};
-
enum hns_roce_device_state {
HNS_ROCE_DEVICE_STATE_INITED,
HNS_ROCE_DEVICE_STATE_RST_DOWN,
@@ -897,26 +853,21 @@ enum hns_roce_device_state {
};
struct hns_roce_hw {
- int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
int (*cmq_init)(struct hns_roce_dev *hr_dev);
void (*cmq_exit)(struct hns_roce_dev *hr_dev);
int (*hw_profile)(struct hns_roce_dev *hr_dev);
int (*hw_init)(struct hns_roce_dev *hr_dev);
void (*hw_exit)(struct hns_roce_dev *hr_dev);
- int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier, u16 op,
- u16 token, int event);
- int (*poll_mbox_done)(struct hns_roce_dev *hr_dev,
- unsigned int timeout);
+ int (*post_mbox)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg);
+ int (*poll_mbox_done)(struct hns_roce_dev *hr_dev);
bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy);
- int (*set_gid)(struct hns_roce_dev *hr_dev, u32 port, int gid_index,
+ int (*set_gid)(struct hns_roce_dev *hr_dev, int gid_index,
const union ib_gid *gid, const struct ib_gid_attr *attr);
int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port,
const u8 *addr);
- void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
- enum ib_mtu mtu);
int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
- struct hns_roce_mr *mr, unsigned long mtpt_idx);
+ struct hns_roce_mr *mr);
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags,
void *mb_buf);
@@ -927,34 +878,33 @@ struct hns_roce_hw {
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
dma_addr_t dma_handle);
int (*set_hem)(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj, int step_idx);
+ struct hns_roce_hem_table *table, int obj, u32 step_idx);
int (*clear_hem)(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
- int step_idx);
+ u32 step_idx);
int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
enum ib_qp_state new_state);
int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp);
- int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
- struct ib_udata *udata);
- int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
+ void (*dereg_mr)(struct hns_roce_dev *hr_dev);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
+ int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer);
+ int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
struct hns_roce_dev {
struct ib_device ib_dev;
- struct platform_device *pdev;
struct pci_dev *pci_dev;
struct device *dev;
struct hns_roce_uar priv_uar;
const char *irq_names[HNS_ROCE_MAX_IRQ_NUM];
spinlock_t sm_lock;
- spinlock_t bt_cmd_lock;
bool active;
bool is_reset;
bool dis_db;
@@ -1001,15 +951,14 @@ struct hns_roce_dev {
int loop_idc;
u32 sdb_offset;
u32 odb_offset;
- dma_addr_t tptr_dma_addr; /* only for hw v1 */
- u32 tptr_size; /* only for hw v1 */
const struct hns_roce_hw *hw;
void *priv;
struct workqueue_struct *irq_workq;
- const struct hns_roce_dfx_hw *dfx;
+ struct work_struct ecc_work;
u32 func_num;
u32 is_vf;
u32 cong_algo_tmpl_id;
+ u64 dwqe_page;
};
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
@@ -1158,7 +1107,7 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
/* hns roce hw need current block and next block addr from mtt */
#define MTT_MIN_COUNT 2
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+ u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
unsigned int page_shift, struct ib_udata *udata,
@@ -1205,9 +1154,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
-int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
@@ -1245,7 +1191,6 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq);
-enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
@@ -1277,8 +1222,12 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
-int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
- struct ib_cq *ib_cq);
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr);
struct hns_user_mmap_entry *
hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
size_t length,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index fa15d79eabb3..aa8a08d1c014 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
#include "hns_roce_common.h"
@@ -456,7 +455,7 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
* alloc bt space chunk for MTT/CQE.
*/
size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
- flag = (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN;
+ flag = GFP_KERNEL | __GFP_NOWARN;
table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
size, flag);
if (!table->hem[index->buf]) {
@@ -489,7 +488,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_index *index)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- int step_idx;
+ u32 step_idx;
int ret = 0;
if (index->inited & HEM_INDEX_L0) {
@@ -589,8 +588,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
table->hem[i] = hns_roce_alloc_hem(hr_dev,
table->table_chunk_size >> PAGE_SHIFT,
table->table_chunk_size,
- (table->lowmem ? GFP_KERNEL :
- GFP_HIGHUSER) | __GFP_NOWARN);
+ GFP_KERNEL | __GFP_NOWARN);
if (!table->hem[i]) {
ret = -ENOMEM;
goto out;
@@ -619,7 +617,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
struct ib_device *ibdev = &hr_dev->ib_dev;
u32 hop_num = mhop->hop_num;
u32 chunk_ba_num;
- int step_idx;
+ u32 step_idx;
index->inited = HEM_INDEX_BUF;
chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
@@ -726,9 +724,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
int length;
int i, j;
- if (!table->lowmem)
- return NULL;
-
mutex_lock(&table->mutex);
if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
@@ -784,8 +779,7 @@ out:
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
- unsigned long obj_size, unsigned long nobj,
- int use_lowmem)
+ unsigned long obj_size, unsigned long nobj)
{
unsigned long obj_per_chunk;
unsigned long num_hem;
@@ -862,7 +856,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
table->type = type;
table->num_hem = num_hem;
table->obj_size = obj_size;
- table->lowmem = use_lowmem;
mutex_init(&table->mutex);
return 0;
@@ -933,7 +926,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
if (table->hem[i]) {
if (hr_dev->hw->clear_hem(hr_dev, table,
i * table->table_chunk_size / table->obj_size, 0))
- dev_err(dev, "Clear HEM base address failed.\n");
+ dev_err(dev, "clear HEM base address failed.\n");
hns_roce_free_hem(hr_dev, table->hem[i]);
}
@@ -987,7 +980,7 @@ struct hns_roce_hem_head {
static struct hns_roce_hem_item *
hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
- bool exist_bt, int bt_level)
+ bool exist_bt)
{
struct hns_roce_hem_item *hem;
@@ -1196,7 +1189,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
start_aligned = (distance / step) * step + r->offset;
end = min_t(int, start_aligned + step - 1, max_ofs);
cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
- true, level);
+ true);
if (!cur) {
ret = -ENOMEM;
goto err_exit;
@@ -1248,7 +1241,7 @@ alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num,
/* indicate to last region */
r = &regions[region_cnt - 1];
hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
- ba_num, true, 0);
+ ba_num, true);
if (!hem)
return ERR_PTR(-ENOMEM);
@@ -1265,7 +1258,7 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
struct hns_roce_hem_item *hem;
hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
- r->count, false, 0);
+ r->count, false);
if (!hem)
return -ENOMEM;
@@ -1422,7 +1415,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
&hem_list->btm_bt);
if (ret) {
dev_err(hr_dev->dev,
- "alloc hem trunk fail ret=%d!\n", ret);
+ "alloc hem trunk fail ret = %d!\n", ret);
goto err_alloc;
}
}
@@ -1431,7 +1424,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions,
region_cnt);
if (ret)
- dev_err(hr_dev->dev, "alloc hem root fail ret=%d!\n", ret);
+ dev_err(hr_dev->dev, "alloc hem root fail ret = %d!\n", ret);
else
return 0;
@@ -1469,19 +1462,17 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list)
void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
- int offset, int *mtt_cnt, u64 *phy_addr)
+ int offset, int *mtt_cnt)
{
struct list_head *head = &hem_list->btm_bt;
struct hns_roce_hem_item *hem, *temp_hem;
void *cpu_base = NULL;
- u64 phy_base = 0;
int nr = 0;
list_for_each_entry_safe(hem, temp_hem, head, sibling) {
if (hem_list_page_is_in_range(hem, offset)) {
nr = offset - hem->start;
cpu_base = hem->addr + nr * BA_BYTE_LEN;
- phy_base = hem->dma_addr + nr * BA_BYTE_LEN;
nr = hem->end + 1 - offset;
break;
}
@@ -1490,8 +1481,5 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
if (mtt_cnt)
*mtt_cnt = nr;
- if (phy_addr)
- *phy_addr = phy_base;
-
return cpu_base;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 2d84a6b3f05d..7d23d3c51da4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -111,8 +111,7 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
dma_addr_t *dma_handle);
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
- unsigned long obj_size, unsigned long nobj,
- int use_lowmem);
+ unsigned long obj_size, unsigned long nobj);
void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table);
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev);
@@ -132,7 +131,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list);
void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
- int offset, int *mtt_cnt, u64 *phy_addr);
+ int offset, int *mtt_cnt);
static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
struct hns_roce_hem_iter *iter)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
deleted file mode 100644
index f4af3992ba95..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ /dev/null
@@ -1,4675 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/platform_device.h>
-#include <linux/acpi.h>
-#include <linux/etherdevice.h>
-#include <linux/interrupt.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <rdma/ib_umem.h>
-#include "hns_roce_common.h"
-#include "hns_roce_device.h"
-#include "hns_roce_cmd.h"
-#include "hns_roce_hem.h"
-#include "hns_roce_hw_v1.h"
-
-/**
- * hns_get_gid_index - Get gid index.
- * @hr_dev: pointer to structure hns_roce_dev.
- * @port: port, value range: 0 ~ MAX
- * @gid_index: gid_index, value range: 0 ~ MAX
- * Description:
- * N ports shared gids, allocation method as follow:
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * And so on
- */
-u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index)
-{
- return gid_index * hr_dev->caps.num_ports + port;
-}
-
-static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg)
-{
- dseg->lkey = cpu_to_le32(sg->lkey);
- dseg->addr = cpu_to_le64(sg->addr);
- dseg->len = cpu_to_le32(sg->length);
-}
-
-static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
- u32 rkey)
-{
- rseg->raddr = cpu_to_le64(remote_addr);
- rseg->rkey = cpu_to_le32(rkey);
- rseg->len = 0;
-}
-
-static int hns_roce_v1_post_send(struct ib_qp *ibqp,
- const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
- struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL;
- struct hns_roce_wqe_ctrl_seg *ctrl = NULL;
- struct hns_roce_wqe_data_seg *dseg = NULL;
- struct hns_roce_qp *qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_sq_db sq_db = {};
- int ps_opcode, i;
- unsigned long flags = 0;
- void *wqe = NULL;
- __le32 doorbell[2];
- const u8 *smac;
- int ret = 0;
- int loopback;
- u32 wqe_idx;
- int nreq;
-
- if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_RC)) {
- dev_err(dev, "un-supported QP type\n");
- *bad_wr = NULL;
- return -EOPNOTSUPP;
- }
-
- spin_lock_irqsave(&qp->sq.lock, flags);
-
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
-
- if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, qp->sq.max_gs);
- ret = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- wqe = hns_roce_get_send_wqe(qp, wqe_idx);
- qp->sq.wrid[wqe_idx] = wr->wr_id;
-
- /* Corresponding to the RC and RD type wqe process separately */
- if (ibqp->qp_type == IB_QPT_GSI) {
- ud_sq_wqe = wqe;
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_0_M,
- UD_SEND_WQE_U32_4_DMAC_0_S,
- ah->av.mac[0]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_1_M,
- UD_SEND_WQE_U32_4_DMAC_1_S,
- ah->av.mac[1]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_2_M,
- UD_SEND_WQE_U32_4_DMAC_2_S,
- ah->av.mac[2]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_3_M,
- UD_SEND_WQE_U32_4_DMAC_3_S,
- ah->av.mac[3]);
-
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_DMAC_4_M,
- UD_SEND_WQE_U32_8_DMAC_4_S,
- ah->av.mac[4]);
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_DMAC_5_M,
- UD_SEND_WQE_U32_8_DMAC_5_S,
- ah->av.mac[5]);
-
- smac = (const u8 *)hr_dev->dev_addr[qp->port];
- loopback = ether_addr_equal_unaligned(ah->av.mac,
- smac) ? 1 : 0;
- roce_set_bit(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
- loopback);
-
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
- UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
- HNS_ROCE_WQE_OPCODE_SEND);
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M,
- UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S,
- 2);
- roce_set_bit(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S,
- 1);
-
- ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ?
- cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
- ((wr->opcode == IB_WR_SEND_WITH_IMM) ?
- cpu_to_le32(HNS_ROCE_WQE_IMM) : 0);
-
- roce_set_field(ud_sq_wqe->u32_16,
- UD_SEND_WQE_U32_16_DEST_QP_M,
- UD_SEND_WQE_U32_16_DEST_QP_S,
- ud_wr(wr)->remote_qpn);
- roce_set_field(ud_sq_wqe->u32_16,
- UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M,
- UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S,
- ah->av.stat_rate);
-
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_FLOW_LABEL_M,
- UD_SEND_WQE_U32_36_FLOW_LABEL_S,
- ah->av.flowlabel);
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_PRIORITY_M,
- UD_SEND_WQE_U32_36_PRIORITY_S,
- ah->av.sl);
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_SGID_INDEX_M,
- UD_SEND_WQE_U32_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev, qp->phy_port,
- ah->av.gid_index));
-
- roce_set_field(ud_sq_wqe->u32_40,
- UD_SEND_WQE_U32_40_HOP_LIMIT_M,
- UD_SEND_WQE_U32_40_HOP_LIMIT_S,
- ah->av.hop_limit);
- roce_set_field(ud_sq_wqe->u32_40,
- UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
- UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S,
- ah->av.tclass);
-
- memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
-
- ud_sq_wqe->va0_l =
- cpu_to_le32((u32)wr->sg_list[0].addr);
- ud_sq_wqe->va0_h =
- cpu_to_le32((wr->sg_list[0].addr) >> 32);
- ud_sq_wqe->l_key0 =
- cpu_to_le32(wr->sg_list[0].lkey);
-
- ud_sq_wqe->va1_l =
- cpu_to_le32((u32)wr->sg_list[1].addr);
- ud_sq_wqe->va1_h =
- cpu_to_le32((wr->sg_list[1].addr) >> 32);
- ud_sq_wqe->l_key1 =
- cpu_to_le32(wr->sg_list[1].lkey);
- } else if (ibqp->qp_type == IB_QPT_RC) {
- u32 tmp_len = 0;
-
- ctrl = wqe;
- memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg));
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
-
- ctrl->msg_length =
- cpu_to_le32(le32_to_cpu(ctrl->msg_length) + tmp_len);
-
- ctrl->sgl_pa_h = 0;
- ctrl->flag = 0;
-
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ctrl->imm_data = wr->ex.imm_data;
- break;
- case IB_WR_SEND_WITH_INV:
- ctrl->inv_key =
- cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- default:
- ctrl->imm_data = 0;
- break;
- }
-
- /* Ctrl field, ctrl set type: sig, solic, imm, fence */
- /* SO wait for conforming application scenarios */
- ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ?
- cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
- ((wr->opcode == IB_WR_SEND_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
- cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) |
- (wr->send_flags & IB_SEND_FENCE ?
- (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0);
-
- wqe += sizeof(struct hns_roce_wqe_ctrl_seg);
-
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ;
- set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- break;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE;
- set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- break;
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- case IB_WR_SEND_WITH_IMM:
- ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
- break;
- case IB_WR_LOCAL_INV:
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- case IB_WR_LSO:
- default:
- ps_opcode = HNS_ROCE_WQE_OPCODE_MASK;
- break;
- }
- ctrl->flag |= cpu_to_le32(ps_opcode);
- wqe += sizeof(struct hns_roce_wqe_raddr_seg);
-
- dseg = wqe;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
- if (le32_to_cpu(ctrl->msg_length) >
- hr_dev->caps.max_sq_inline) {
- ret = -EINVAL;
- *bad_wr = wr;
- dev_err(dev, "inline len(1-%d)=%d, illegal",
- le32_to_cpu(ctrl->msg_length),
- hr_dev->caps.max_sq_inline);
- goto out;
- }
- for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *) (uintptr_t)
- wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
- }
- ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE);
- } else {
- /* sqe num is two */
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg(dseg + i, wr->sg_list + i);
-
- ctrl->flag |= cpu_to_le32(wr->num_sge <<
- HNS_ROCE_WQE_SGE_NUM_BIT);
- }
- }
- }
-
-out:
- /* Set DB return */
- if (likely(nreq)) {
- qp->sq.head += nreq;
-
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
- SQ_DOORBELL_U32_4_SQ_HEAD_S,
- (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M,
- SQ_DOORBELL_U32_4_SL_S, qp->sl);
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
- SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
- roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
- SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
- roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
-
- doorbell[0] = sq_db.u32_4;
- doorbell[1] = sq_db.u32_8;
-
- hns_roce_write64_k(doorbell, qp->sq.db_reg);
- }
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return ret;
-}
-
-static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
- const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
- struct hns_roce_wqe_data_seg *scat = NULL;
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_rq_db rq_db = {};
- __le32 doorbell[2] = {0};
- unsigned long flags = 0;
- unsigned int wqe_idx;
- int ret = 0;
- int nreq;
- int i;
- u32 reg_val;
-
- spin_lock_irqsave(&hr_qp->rq.lock, flags);
-
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
- hr_qp->ibqp.recv_cq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
-
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
- dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, hr_qp->rq.max_gs);
- ret = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- ctrl = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
-
- roce_set_field(ctrl->rwqe_byte_12,
- RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
- RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S,
- wr->num_sge);
-
- scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1);
-
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg(scat + i, wr->sg_list + i);
-
- hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
- }
-
-out:
- if (likely(nreq)) {
- hr_qp->rq.head += nreq;
-
- if (ibqp->qp_type == IB_QPT_GSI) {
- __le32 tmp;
-
- /* SW update GSI rq header */
- reg_val = roce_read(to_hr_dev(ibqp->device),
- ROCEE_QP1C_CFG3_0_REG +
- QP1C_CFGN_OFFSET * hr_qp->phy_port);
- tmp = cpu_to_le32(reg_val);
- roce_set_field(tmp,
- ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
- ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
- hr_qp->rq.head);
- reg_val = le32_to_cpu(tmp);
- roce_write(to_hr_dev(ibqp->device),
- ROCEE_QP1C_CFG3_0_REG +
- QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
- } else {
- roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
- RQ_DOORBELL_U32_4_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M,
- RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
- roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M,
- RQ_DOORBELL_U32_8_CMD_S, 1);
- roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
- 1);
-
- doorbell[0] = rq_db.u32_4;
- doorbell[1] = rq_db.u32_8;
-
- hns_roce_write64_k(doorbell, hr_qp->rq.db_reg);
- }
- }
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
-
- return ret;
-}
-
-static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
- int sdb_mode, int odb_mode)
-{
- __le32 tmp;
- u32 val;
-
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- tmp = cpu_to_le32(val);
- roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
- roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
-}
-
-static int hns_roce_v1_set_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj,
- int step_idx)
-{
- spinlock_t *lock = &hr_dev->bt_cmd_lock;
- struct device *dev = hr_dev->dev;
- struct hns_roce_hem_iter iter;
- void __iomem *bt_cmd;
- __le32 bt_cmd_val[2];
- __le32 bt_cmd_h = 0;
- unsigned long flags;
- __le32 bt_cmd_l;
- int ret = 0;
- u64 bt_ba;
- long end;
-
- /* Find the HEM(Hardware Entry Memory) entry */
- unsigned long i = obj / (table->table_chunk_size / table->obj_size);
-
- switch (table->type) {
- case HEM_TYPE_QPC:
- case HEM_TYPE_MTPT:
- case HEM_TYPE_CQC:
- case HEM_TYPE_SRQC:
- roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
- break;
- default:
- return ret;
- }
-
- roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
- /* Currently iter only a chunk */
- for (hns_roce_hem_first(table->hem[i], &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter) >> HNS_HW_PAGE_SHIFT;
-
- spin_lock_irqsave(lock, flags);
-
- bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
- end = HW_SYNC_TIMEOUT_MSECS;
- while (end > 0) {
- if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT))
- break;
-
- mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
- end -= HW_SYNC_SLEEP_TIME_INTERVAL;
- }
-
- if (end <= 0) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(lock, flags);
- return -EBUSY;
- }
-
- bt_cmd_l = cpu_to_le32(bt_ba);
- roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
- upper_32_bits(bt_ba));
-
- bt_cmd_val[0] = bt_cmd_l;
- bt_cmd_val[1] = bt_cmd_h;
- hns_roce_write64_k(bt_cmd_val,
- hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
- spin_unlock_irqrestore(lock, flags);
- }
-
- return ret;
-}
-
-static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
- u32 odb_mode)
-{
- __le32 tmp;
- u32 val;
-
- /* Configure SDB/ODB extend mode */
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- tmp = cpu_to_le32(val);
- roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
- roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
-}
-
-static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept,
- u32 sdb_alful)
-{
- __le32 tmp;
- u32 val;
-
- /* Configure SDB */
- val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
- ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful);
- roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
- ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val);
-}
-
-static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept,
- u32 odb_alful)
-{
- __le32 tmp;
- u32 val;
-
- /* Configure ODB */
- val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
- ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful);
- roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
- ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val);
-}
-
-static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
- u32 ext_sdb_alful)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_db_table *db = &priv->db_table;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t sdb_dma_addr;
- __le32 tmp;
- u32 val;
-
- /* Configure extend SDB threshold */
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept);
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful);
-
- /* Configure extend SDB base addr */
- sdb_dma_addr = db->ext_db->sdb_buf_list->map;
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_REG, (u32)(sdb_dma_addr >> 12));
-
- /* Configure extend SDB depth */
- val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S,
- db->ext_db->esdb_dep);
- /*
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * calculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val);
-
- dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep);
- dev_dbg(dev, "ext SDB threshold: empty: 0x%x, ful: 0x%x\n",
- ext_sdb_alept, ext_sdb_alful);
-}
-
-static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
- u32 ext_odb_alful)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_db_table *db = &priv->db_table;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t odb_dma_addr;
- __le32 tmp;
- u32 val;
-
- /* Configure extend ODB threshold */
- roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept);
- roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful);
-
- /* Configure extend ODB base addr */
- odb_dma_addr = db->ext_db->odb_buf_list->map;
- roce_write(hr_dev, ROCEE_EXT_DB_OTH_REG, (u32)(odb_dma_addr >> 12));
-
- /* Configure extend ODB depth */
- val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
- ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S,
- db->ext_db->eodb_dep);
- roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S,
- db->ext_db->eodb_dep);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val);
-
- dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep);
- dev_dbg(dev, "ext ODB threshold: empty: 0x%x, ful: 0x%x\n",
- ext_odb_alept, ext_odb_alful);
-}
-
-static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod,
- u32 odb_ext_mod)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_db_table *db = &priv->db_table;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t sdb_dma_addr;
- dma_addr_t odb_dma_addr;
- int ret = 0;
-
- db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL);
- if (!db->ext_db)
- return -ENOMEM;
-
- if (sdb_ext_mod) {
- db->ext_db->sdb_buf_list = kmalloc(
- sizeof(*db->ext_db->sdb_buf_list), GFP_KERNEL);
- if (!db->ext_db->sdb_buf_list) {
- ret = -ENOMEM;
- goto ext_sdb_buf_fail_out;
- }
-
- db->ext_db->sdb_buf_list->buf = dma_alloc_coherent(dev,
- HNS_ROCE_V1_EXT_SDB_SIZE,
- &sdb_dma_addr, GFP_KERNEL);
- if (!db->ext_db->sdb_buf_list->buf) {
- ret = -ENOMEM;
- goto alloc_sq_db_buf_fail;
- }
- db->ext_db->sdb_buf_list->map = sdb_dma_addr;
-
- db->ext_db->esdb_dep = ilog2(HNS_ROCE_V1_EXT_SDB_DEPTH);
- hns_roce_set_sdb_ext(hr_dev, HNS_ROCE_V1_EXT_SDB_ALEPT,
- HNS_ROCE_V1_EXT_SDB_ALFUL);
- } else
- hns_roce_set_sdb(hr_dev, HNS_ROCE_V1_SDB_ALEPT,
- HNS_ROCE_V1_SDB_ALFUL);
-
- if (odb_ext_mod) {
- db->ext_db->odb_buf_list = kmalloc(
- sizeof(*db->ext_db->odb_buf_list), GFP_KERNEL);
- if (!db->ext_db->odb_buf_list) {
- ret = -ENOMEM;
- goto ext_odb_buf_fail_out;
- }
-
- db->ext_db->odb_buf_list->buf = dma_alloc_coherent(dev,
- HNS_ROCE_V1_EXT_ODB_SIZE,
- &odb_dma_addr, GFP_KERNEL);
- if (!db->ext_db->odb_buf_list->buf) {
- ret = -ENOMEM;
- goto alloc_otr_db_buf_fail;
- }
- db->ext_db->odb_buf_list->map = odb_dma_addr;
-
- db->ext_db->eodb_dep = ilog2(HNS_ROCE_V1_EXT_ODB_DEPTH);
- hns_roce_set_odb_ext(hr_dev, HNS_ROCE_V1_EXT_ODB_ALEPT,
- HNS_ROCE_V1_EXT_ODB_ALFUL);
- } else
- hns_roce_set_odb(hr_dev, HNS_ROCE_V1_ODB_ALEPT,
- HNS_ROCE_V1_ODB_ALFUL);
-
- hns_roce_set_db_ext_mode(hr_dev, sdb_ext_mod, odb_ext_mod);
-
- return 0;
-
-alloc_otr_db_buf_fail:
- kfree(db->ext_db->odb_buf_list);
-
-ext_odb_buf_fail_out:
- if (sdb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE,
- db->ext_db->sdb_buf_list->buf,
- db->ext_db->sdb_buf_list->map);
- }
-
-alloc_sq_db_buf_fail:
- if (sdb_ext_mod)
- kfree(db->ext_db->sdb_buf_list);
-
-ext_sdb_buf_fail_out:
- kfree(db->ext_db);
- return ret;
-}
-
-static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
- struct ib_pd *pd)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_qp_init_attr init_attr;
- struct ib_qp *qp;
-
- memset(&init_attr, 0, sizeof(struct ib_qp_init_attr));
- init_attr.qp_type = IB_QPT_RC;
- init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM;
- init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM;
-
- qp = ib_create_qp(pd, &init_attr);
- if (IS_ERR(qp)) {
- dev_err(dev, "Create loop qp for mr free failed!");
- return NULL;
- }
-
- return to_hr_qp(qp);
-}
-
-static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
- struct hns_roce_caps *caps = &hr_dev->caps;
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_cq_init_attr cq_init_attr;
- struct ib_qp_attr attr = { 0 };
- struct hns_roce_qp *hr_qp;
- struct ib_cq *cq;
- struct ib_pd *pd;
- union ib_gid dgid;
- __be64 subnet_prefix;
- int attr_mask = 0;
- int ret;
- int i, j;
- u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
- u8 phy_port;
- u32 port = 0;
- u8 sl;
-
- /* Reserved cq for loop qp */
- cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2;
- cq_init_attr.comp_vector = 0;
-
- cq = rdma_zalloc_drv_obj(ibdev, ib_cq);
- if (!cq)
- return -ENOMEM;
-
- ret = hns_roce_create_cq(cq, &cq_init_attr, NULL);
- if (ret) {
- dev_err(dev, "Create cq for reserved loop qp failed!");
- goto alloc_cq_failed;
- }
- free_mr->mr_free_cq = to_hr_cq(cq);
- free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev;
- free_mr->mr_free_cq->ib_cq.uobject = NULL;
- free_mr->mr_free_cq->ib_cq.comp_handler = NULL;
- free_mr->mr_free_cq->ib_cq.event_handler = NULL;
- free_mr->mr_free_cq->ib_cq.cq_context = NULL;
- atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
-
- pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
- if (!pd) {
- ret = -ENOMEM;
- goto alloc_mem_failed;
- }
-
- pd->device = ibdev;
- ret = hns_roce_alloc_pd(pd, NULL);
- if (ret)
- goto alloc_pd_failed;
-
- free_mr->mr_free_pd = to_hr_pd(pd);
- free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
- free_mr->mr_free_pd->ibpd.uobject = NULL;
- free_mr->mr_free_pd->ibpd.__internal_mr = NULL;
- atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
-
- attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- attr.pkey_index = 0;
- attr.min_rnr_timer = 0;
- /* Disable read ability */
- attr.max_dest_rd_atomic = 0;
- attr.max_rd_atomic = 0;
- /* Use arbitrary values as rq_psn and sq_psn */
- attr.rq_psn = 0x0808;
- attr.sq_psn = 0x0808;
- attr.retry_cnt = 7;
- attr.rnr_retry = 7;
- attr.timeout = 0x12;
- attr.path_mtu = IB_MTU_256;
- attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
- rdma_ah_set_static_rate(&attr.ah_attr, 3);
-
- subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
- (i % HNS_ROCE_MAX_PORTS);
- sl = i / HNS_ROCE_MAX_PORTS;
-
- for (j = 0; j < caps->num_ports; j++) {
- if (hr_dev->iboe.phy_port[j] == phy_port) {
- queue_en[i] = 1;
- port = j;
- break;
- }
- }
-
- if (!queue_en[i])
- continue;
-
- free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
- if (!free_mr->mr_free_qp[i]) {
- dev_err(dev, "Create loop qp failed!\n");
- ret = -ENOMEM;
- goto create_lp_qp_failed;
- }
- hr_qp = free_mr->mr_free_qp[i];
-
- hr_qp->port = port;
- hr_qp->phy_port = phy_port;
- hr_qp->ibqp.qp_type = IB_QPT_RC;
- hr_qp->ibqp.device = &hr_dev->ib_dev;
- hr_qp->ibqp.uobject = NULL;
- atomic_set(&hr_qp->ibqp.usecnt, 0);
- hr_qp->ibqp.pd = pd;
- hr_qp->ibqp.recv_cq = cq;
- hr_qp->ibqp.send_cq = cq;
-
- rdma_ah_set_port_num(&attr.ah_attr, port + 1);
- rdma_ah_set_sl(&attr.ah_attr, sl);
- attr.port_num = port + 1;
-
- attr.dest_qp_num = hr_qp->qpn;
- memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr),
- hr_dev->dev_addr[port],
- ETH_ALEN);
-
- memcpy(&dgid.raw, &subnet_prefix, sizeof(u64));
- memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3);
- memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3);
- dgid.raw[11] = 0xff;
- dgid.raw[12] = 0xfe;
- dgid.raw[8] ^= 2;
- rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw);
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_RESET, IB_QPS_INIT);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, IB_QP_DEST_QPN,
- IB_QPS_INIT, IB_QPS_RTR);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_RTR, IB_QPS_RTS);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
- }
-
- return 0;
-
-create_lp_qp_failed:
- for (i -= 1; i >= 0; i--) {
- hr_qp = free_mr->mr_free_qp[i];
- if (ib_destroy_qp(&hr_qp->ibqp))
- dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
- }
-
- hns_roce_dealloc_pd(pd, NULL);
-
-alloc_pd_failed:
- kfree(pd);
-
-alloc_mem_failed:
- hns_roce_destroy_cq(cq, NULL);
-alloc_cq_failed:
- kfree(cq);
- return ret;
-}
-
-static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp *hr_qp;
- int ret;
- int i;
-
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- hr_qp = free_mr->mr_free_qp[i];
- if (!hr_qp)
- continue;
-
- ret = ib_destroy_qp(&hr_qp->ibqp);
- if (ret)
- dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
- i, ret);
- }
-
- hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
- kfree(&free_mr->mr_free_cq->ib_cq);
- hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL);
- kfree(&free_mr->mr_free_pd->ibpd);
-}
-
-static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_db_table *db = &priv->db_table;
- struct device *dev = &hr_dev->pdev->dev;
- u32 sdb_ext_mod;
- u32 odb_ext_mod;
- u32 sdb_evt_mod;
- u32 odb_evt_mod;
- int ret;
-
- memset(db, 0, sizeof(*db));
-
- /* Default DB mode */
- sdb_ext_mod = HNS_ROCE_SDB_EXTEND_MODE;
- odb_ext_mod = HNS_ROCE_ODB_EXTEND_MODE;
- sdb_evt_mod = HNS_ROCE_SDB_NORMAL_MODE;
- odb_evt_mod = HNS_ROCE_ODB_POLL_MODE;
-
- db->sdb_ext_mod = sdb_ext_mod;
- db->odb_ext_mod = odb_ext_mod;
-
- /* Init extend DB */
- ret = hns_roce_db_ext_init(hr_dev, sdb_ext_mod, odb_ext_mod);
- if (ret) {
- dev_err(dev, "Failed in extend DB configuration.\n");
- return ret;
- }
-
- hns_roce_set_db_event_mode(hr_dev, sdb_evt_mod, odb_evt_mod);
-
- return 0;
-}
-
-static void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work)
-{
- struct hns_roce_recreate_lp_qp_work *lp_qp_work;
- struct hns_roce_dev *hr_dev;
-
- lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work,
- work);
- hr_dev = to_hr_dev(lp_qp_work->ib_dev);
-
- hns_roce_v1_release_lp_qp(hr_dev);
-
- if (hns_roce_v1_rsv_lp_qp(hr_dev))
- dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n");
-
- if (lp_qp_work->comp_flag)
- complete(lp_qp_work->comp);
-
- kfree(lp_qp_work);
-}
-
-static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
-{
- long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
- struct hns_roce_recreate_lp_qp_work *lp_qp_work;
- struct device *dev = &hr_dev->pdev->dev;
- struct completion comp;
-
- lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work),
- GFP_KERNEL);
- if (!lp_qp_work)
- return -ENOMEM;
-
- INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn);
-
- lp_qp_work->ib_dev = &(hr_dev->ib_dev);
- lp_qp_work->comp = &comp;
- lp_qp_work->comp_flag = 1;
-
- init_completion(lp_qp_work->comp);
-
- queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
-
- while (end > 0) {
- if (try_wait_for_completion(&comp))
- return 0;
- msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
- end -= HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE;
- }
-
- lp_qp_work->comp_flag = 0;
- if (try_wait_for_completion(&comp))
- return 0;
-
- dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n");
- return -ETIMEDOUT;
-}
-
-static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_send_wr send_wr;
- const struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&send_wr, 0, sizeof(send_wr));
- send_wr.next = NULL;
- send_wr.num_sge = 0;
- send_wr.send_flags = 0;
- send_wr.sg_list = NULL;
- send_wr.wr_id = (unsigned long long)&send_wr;
- send_wr.opcode = IB_WR_RDMA_WRITE;
-
- ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr);
- if (ret) {
- dev_err(dev, "Post write wqe for mr free failed(%d)!", ret);
- return ret;
- }
-
- return 0;
-}
-
-static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
-{
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
- struct hns_roce_mr_free_work *mr_work =
- container_of(work, struct hns_roce_mr_free_work, work);
- struct hns_roce_dev *hr_dev = to_hr_dev(mr_work->ib_dev);
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
- struct hns_roce_cq *mr_free_cq = free_mr->mr_free_cq;
- struct hns_roce_mr *hr_mr = mr_work->mr;
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_wc wc[HNS_ROCE_V1_RESV_QP];
- struct hns_roce_qp *hr_qp;
- int ne = 0;
- int ret;
- int i;
-
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- hr_qp = free_mr->mr_free_qp[i];
- if (!hr_qp)
- continue;
- ne++;
-
- ret = hns_roce_v1_send_lp_wqe(hr_qp);
- if (ret) {
- dev_err(dev,
- "Send wqe (qp:0x%lx) for mr free failed(%d)!\n",
- hr_qp->qpn, ret);
- goto free_work;
- }
- }
-
- if (!ne) {
- dev_err(dev, "Reserved loop qp is absent!\n");
- goto free_work;
- }
-
- do {
- ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
- if (ret < 0 && hr_qp) {
- dev_err(dev,
- "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
- hr_qp->qpn, ret, hr_mr->key, ne);
- goto free_work;
- }
- ne -= ret;
- usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000,
- (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000);
- } while (ne && time_before_eq(jiffies, end));
-
- if (ne != 0)
- dev_err(dev,
- "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n",
- hr_mr->key, ne);
-
-free_work:
- if (mr_work->comp_flag)
- complete(mr_work->comp);
- kfree(mr_work);
-}
-
-static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, struct ib_udata *udata)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
- long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_mr_free_work *mr_work;
- unsigned long start = jiffies;
- struct completion comp;
- int ret = 0;
-
- if (mr->enabled) {
- if (hns_roce_hw_destroy_mpt(hr_dev, NULL,
- key_to_hw_index(mr->key) &
- (hr_dev->caps.num_mtpts - 1)))
- dev_warn(dev, "DESTROY_MPT failed!\n");
- }
-
- mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
- if (!mr_work) {
- ret = -ENOMEM;
- goto free_mr;
- }
-
- INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn);
-
- mr_work->ib_dev = &(hr_dev->ib_dev);
- mr_work->comp = &comp;
- mr_work->comp_flag = 1;
- mr_work->mr = (void *)mr;
- init_completion(mr_work->comp);
-
- queue_work(free_mr->free_mr_wq, &(mr_work->work));
-
- while (end > 0) {
- if (try_wait_for_completion(&comp))
- goto free_mr;
- msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
- end -= HNS_ROCE_V1_FREE_MR_WAIT_VALUE;
- }
-
- mr_work->comp_flag = 0;
- if (try_wait_for_completion(&comp))
- goto free_mr;
-
- dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key);
- ret = -ETIMEDOUT;
-
-free_mr:
- dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
- mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
-
- ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)key_to_hw_index(mr->key));
- hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
- kfree(mr);
-
- return ret;
-}
-
-static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_db_table *db = &priv->db_table;
- struct device *dev = &hr_dev->pdev->dev;
-
- if (db->sdb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE,
- db->ext_db->sdb_buf_list->buf,
- db->ext_db->sdb_buf_list->map);
- kfree(db->ext_db->sdb_buf_list);
- }
-
- if (db->odb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_ODB_SIZE,
- db->ext_db->odb_buf_list->buf,
- db->ext_db->odb_buf_list->map);
- kfree(db->ext_db->odb_buf_list);
- }
-
- kfree(db->ext_db);
-}
-
-static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_raq_table *raq = &priv->raq_table;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t addr;
- int raq_shift;
- __le32 tmp;
- u32 val;
- int ret;
-
- raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL);
- if (!raq->e_raq_buf)
- return -ENOMEM;
-
- raq->e_raq_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_RAQ_SIZE,
- &addr, GFP_KERNEL);
- if (!raq->e_raq_buf->buf) {
- ret = -ENOMEM;
- goto err_dma_alloc_raq;
- }
- raq->e_raq_buf->map = addr;
-
- /* Configure raq extended address. 48bit 4K align */
- roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12);
-
- /* Configure raq_shift */
- raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY);
- val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
- ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift);
- /*
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * calculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
- ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S,
- raq->e_raq_buf->map >> 44);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val);
- dev_dbg(dev, "Configure raq_shift 0x%x.\n", val);
-
- /* Configure raq threshold */
- val = roce_read(hr_dev, ROCEE_RAQ_WL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
- ROCEE_RAQ_WL_ROCEE_RAQ_WL_S,
- HNS_ROCE_V1_EXT_RAQ_WF);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_RAQ_WL_REG, val);
- dev_dbg(dev, "Configure raq_wl 0x%x.\n", val);
-
- /* Enable extend raq */
- val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S,
- POL_TIME_INTERVAL_VAL);
- roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
- roce_set_field(tmp,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S,
- 2);
- roce_set_bit(tmp,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val);
- dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val);
-
- /* Enable raq drop */
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- tmp = cpu_to_le32(val);
- roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val);
-
- return 0;
-
-err_dma_alloc_raq:
- kfree(raq->e_raq_buf);
- return ret;
-}
-
-static void hns_roce_raq_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_raq_table *raq = &priv->raq_table;
- struct device *dev = &hr_dev->pdev->dev;
-
- dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf,
- raq->e_raq_buf->map);
- kfree(raq->e_raq_buf);
-}
-
-static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
-{
- __le32 tmp;
- u32 val;
-
- if (enable_flag) {
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- /* Open all ports */
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
- ROCEE_GLB_CFG_ROCEE_PORT_ST_S,
- ALL_PORT_VAL_OPEN);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- } else {
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- /* Close all ports */
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
- ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- }
-}
-
-static int hns_roce_bt_init(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct device *dev = &hr_dev->pdev->dev;
- int ret;
-
- priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.qpc_buf.buf)
- return -ENOMEM;
-
- priv->bt_table.mtpt_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.mtpt_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.mtpt_buf.buf) {
- ret = -ENOMEM;
- goto err_failed_alloc_mtpt_buf;
- }
-
- priv->bt_table.cqc_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.cqc_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.cqc_buf.buf) {
- ret = -ENOMEM;
- goto err_failed_alloc_cqc_buf;
- }
-
- return 0;
-
-err_failed_alloc_cqc_buf:
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
-
-err_failed_alloc_mtpt_buf:
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
-
- return ret;
-}
-
-static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct device *dev = &hr_dev->pdev->dev;
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map);
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
-}
-
-static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf;
- struct device *dev = &hr_dev->pdev->dev;
-
- /*
- * This buffer will be used for CQ's tptr(tail pointer), also
- * named ci(customer index). Every CQ will use 2 bytes to save
- * cqe ci in hip06. Hardware will read this area to get new ci
- * when the queue is almost full.
- */
- tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
- &tptr_buf->map, GFP_KERNEL);
- if (!tptr_buf->buf)
- return -ENOMEM;
-
- hr_dev->tptr_dma_addr = tptr_buf->map;
- hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
-
- return 0;
-}
-
-static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf;
- struct device *dev = &hr_dev->pdev->dev;
-
- dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
- tptr_buf->buf, tptr_buf->map);
-}
-
-static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
- struct device *dev = &hr_dev->pdev->dev;
- int ret;
-
- free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
- if (!free_mr->free_mr_wq) {
- dev_err(dev, "Create free mr workqueue failed!\n");
- return -ENOMEM;
- }
-
- ret = hns_roce_v1_rsv_lp_qp(hr_dev);
- if (ret) {
- dev_err(dev, "Reserved loop qp failed(%d)!\n", ret);
- destroy_workqueue(free_mr->free_mr_wq);
- }
-
- return ret;
-}
-
-static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_free_mr *free_mr = &priv->free_mr;
-
- destroy_workqueue(free_mr->free_mr_wq);
-
- hns_roce_v1_release_lp_qp(hr_dev);
-}
-
-/**
- * hns_roce_v1_reset - reset RoCE
- * @hr_dev: RoCE device struct pointer
- * @dereset: true -- drop reset, false -- reset
- * return 0 - success , negative --fail
- */
-static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
-{
- struct device_node *dsaf_node;
- struct device *dev = &hr_dev->pdev->dev;
- struct device_node *np = dev->of_node;
- struct fwnode_handle *fwnode;
- int ret;
-
- /* check if this is DT/ACPI case */
- if (dev_of_node(dev)) {
- dsaf_node = of_parse_phandle(np, "dsaf-handle", 0);
- if (!dsaf_node) {
- dev_err(dev, "could not find dsaf-handle\n");
- return -EINVAL;
- }
- fwnode = &dsaf_node->fwnode;
- } else if (is_acpi_device_node(dev->fwnode)) {
- struct fwnode_reference_args args;
-
- ret = acpi_node_get_property_reference(dev->fwnode,
- "dsaf-handle", 0, &args);
- if (ret) {
- dev_err(dev, "could not find dsaf-handle\n");
- return ret;
- }
- fwnode = args.fwnode;
- } else {
- dev_err(dev, "cannot read data from DT or ACPI\n");
- return -ENXIO;
- }
-
- ret = hns_dsaf_roce_reset(fwnode, false);
- if (ret)
- return ret;
-
- if (dereset) {
- msleep(SLEEP_TIME_INTERVAL);
- ret = hns_dsaf_roce_reset(fwnode, true);
- }
-
- return ret;
-}
-
-static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_caps *caps = &hr_dev->caps;
- int i;
-
- hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
- hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
- hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) |
- ((u64)roce_read(hr_dev,
- ROCEE_SYS_IMAGE_GUID_H_REG) << 32);
- hr_dev->hw_rev = HNS_ROCE_HW_VER1;
-
- caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
- caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM;
- caps->min_wqes = HNS_ROCE_MIN_WQE_NUM;
- caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM;
- caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
- caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM;
- caps->max_sq_sg = HNS_ROCE_V1_SG_NUM;
- caps->max_rq_sg = HNS_ROCE_V1_SG_NUM;
- caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE;
- caps->num_uars = HNS_ROCE_V1_UAR_NUM;
- caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM;
- caps->num_aeq_vectors = HNS_ROCE_V1_AEQE_VEC_NUM;
- caps->num_comp_vectors = HNS_ROCE_V1_COMP_VEC_NUM;
- caps->num_other_vectors = HNS_ROCE_V1_ABNORMAL_VEC_NUM;
- caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM;
- caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS;
- caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM;
- caps->max_qp_init_rdma = HNS_ROCE_V1_MAX_QP_INIT_RDMA;
- caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA;
- caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ;
- caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ;
- caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE;
- caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE;
- caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE;
- caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE;
- caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE;
- caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE;
- caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
- caps->reserved_lkey = 0;
- caps->reserved_pds = 0;
- caps->reserved_mrws = 1;
- caps->reserved_uars = 0;
- caps->reserved_cqs = 0;
- caps->reserved_qps = 12; /* 2 SQP per port, six ports total 12 */
- caps->chunk_sz = HNS_ROCE_V1_TABLE_CHUNK_SIZE;
-
- for (i = 0; i < caps->num_ports; i++)
- caps->pkey_table_len[i] = 1;
-
- for (i = 0; i < caps->num_ports; i++) {
- /* Six ports shared 16 GID in v1 engine */
- if (i >= (HNS_ROCE_V1_GID_NUM % caps->num_ports))
- caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM /
- caps->num_ports;
- else
- caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM /
- caps->num_ports + 1;
- }
-
- caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
- caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
- caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG);
- caps->max_mtu = IB_MTU_2048;
-
- return 0;
-}
-
-static int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
-{
- int ret;
- u32 val;
- __le32 tmp;
- struct device *dev = &hr_dev->pdev->dev;
-
- /* DMAE user config */
- val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
- ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
- ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S,
- 1 << PAGES_SHIFT_16);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val);
-
- val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
- ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
- ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S,
- 1 << PAGES_SHIFT_16);
-
- ret = hns_roce_db_init(hr_dev);
- if (ret) {
- dev_err(dev, "doorbell init failed!\n");
- return ret;
- }
-
- ret = hns_roce_raq_init(hr_dev);
- if (ret) {
- dev_err(dev, "raq init failed!\n");
- goto error_failed_raq_init;
- }
-
- ret = hns_roce_bt_init(hr_dev);
- if (ret) {
- dev_err(dev, "bt init failed!\n");
- goto error_failed_bt_init;
- }
-
- ret = hns_roce_tptr_init(hr_dev);
- if (ret) {
- dev_err(dev, "tptr init failed!\n");
- goto error_failed_tptr_init;
- }
-
- ret = hns_roce_free_mr_init(hr_dev);
- if (ret) {
- dev_err(dev, "free mr init failed!\n");
- goto error_failed_free_mr_init;
- }
-
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
-
- return 0;
-
-error_failed_free_mr_init:
- hns_roce_tptr_free(hr_dev);
-
-error_failed_tptr_init:
- hns_roce_bt_free(hr_dev);
-
-error_failed_bt_init:
- hns_roce_raq_free(hr_dev);
-
-error_failed_raq_init:
- hns_roce_db_free(hr_dev);
- return ret;
-}
-
-static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
-{
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
- hns_roce_free_mr_free(hr_dev);
- hns_roce_tptr_free(hr_dev);
- hns_roce_bt_free(hr_dev);
- hns_roce_raq_free(hr_dev);
- hns_roce_db_free(hr_dev);
-}
-
-static int hns_roce_v1_cmd_pending(struct hns_roce_dev *hr_dev)
-{
- u32 status = readl(hr_dev->reg_base + ROCEE_MB6_REG);
-
- return (!!(status & (1 << HCR_GO_BIT)));
-}
-
-static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier,
- u16 op, u16 token, int event)
-{
- u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG);
- unsigned long end;
- u32 val = 0;
- __le32 tmp;
-
- end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
- while (hns_roce_v1_cmd_pending(hr_dev)) {
- if (time_after(jiffies, end)) {
- dev_err(hr_dev->dev, "jiffies=%d end=%d\n",
- (int)jiffies, (int)end);
- return -EAGAIN;
- }
- cond_resched();
- }
-
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
- op);
- roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
- ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
- roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
- roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
- roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M,
- ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
-
- val = le32_to_cpu(tmp);
- writeq(in_param, hcr + 0);
- writeq(out_param, hcr + 2);
- writel(in_modifier, hcr + 4);
- /* Memory barrier */
- wmb();
-
- writel(val, hcr + 5);
-
- return 0;
-}
-
-static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
- unsigned int timeout)
-{
- u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG;
- unsigned long end;
- u32 status = 0;
-
- end = msecs_to_jiffies(timeout) + jiffies;
- while (hns_roce_v1_cmd_pending(hr_dev) && time_before(jiffies, end))
- cond_resched();
-
- if (hns_roce_v1_cmd_pending(hr_dev)) {
- dev_err(hr_dev->dev, "[cmd_poll]hw run cmd TIMEDOUT!\n");
- return -ETIMEDOUT;
- }
-
- status = le32_to_cpu((__force __le32)
- __raw_readl(hcr + HCR_STATUS_OFFSET));
- if ((status & STATUS_MASK) != 0x1) {
- dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status);
- return -EBUSY;
- }
-
- return 0;
-}
-
-static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u32 port,
- int gid_index, const union ib_gid *gid,
- const struct ib_gid_attr *attr)
-{
- unsigned long flags;
- u32 *p = NULL;
- u8 gid_idx;
-
- gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
- p = (u32 *)&gid->raw[0];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[4];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_ML_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[8];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_MH_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[0xc];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-
- return 0;
-}
-
-static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
- const u8 *addr)
-{
- u32 reg_smac_l;
- u16 reg_smac_h;
- __le32 tmp;
- u16 *p_h;
- u32 *p;
- u32 val;
-
- /*
- * When mac changed, loopback may fail
- * because of smac not equal to dmac.
- * We Need to release and create reserved qp again.
- */
- if (hr_dev->hw->dereg_mr) {
- int ret;
-
- ret = hns_roce_v1_recreate_lp_qp(hr_dev);
- if (ret && ret != -ETIMEDOUT)
- return ret;
- }
-
- p = (u32 *)(&addr[0]);
- reg_smac_l = *p;
- roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG +
- PHY_PORT_OFFSET * phy_port);
-
- val = roce_read(hr_dev,
- ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- tmp = cpu_to_le32(val);
- p_h = (u16 *)(&addr[4]);
- reg_smac_h = *p_h;
- roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
- ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
- val);
-
- return 0;
-}
-
-static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
- enum ib_mtu mtu)
-{
- __le32 tmp;
- u32 val;
-
- val = roce_read(hr_dev,
- ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
- ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
- val);
-}
-
-static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf,
- struct hns_roce_mr *mr,
- unsigned long mtpt_idx)
-{
- u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 };
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_v1_mpt_entry *mpt_entry;
- dma_addr_t pbl_ba;
- int count;
- int i;
-
- /* MPT filled into mailbox buf */
- mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf;
- memset(mpt_entry, 0, sizeof(*mpt_entry));
-
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M,
- MPT_BYTE_4_KEY_STATE_S, KEY_VALID);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M,
- MPT_BYTE_4_KEY_S, mr->key);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M,
- MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S,
- (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M,
- MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S,
- (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S,
- (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S,
- (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S,
- 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0);
-
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S, 0);
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
- MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
-
- mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova);
- mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32));
- mpt_entry->length = cpu_to_le32((u32)mr->size);
-
- roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
- MPT_BYTE_28_PD_S, mr->pd);
- roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M,
- MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx);
- roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
- MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
-
- /* DMA memory register */
- if (mr->type == MR_TYPE_DMA)
- return 0;
-
- count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
- ARRAY_SIZE(pages), &pbl_ba);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find PBL mtr, count = %d.", count);
- return -ENOBUFS;
- }
-
- /* Register user mr */
- for (i = 0; i < count; i++) {
- switch (i) {
- case 0:
- mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_36,
- MPT_BYTE_36_PA0_H_M,
- MPT_BYTE_36_PA0_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_32));
- break;
- case 1:
- roce_set_field(mpt_entry->mpt_byte_36,
- MPT_BYTE_36_PA1_L_M,
- MPT_BYTE_36_PA1_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_40,
- MPT_BYTE_40_PA1_H_M,
- MPT_BYTE_40_PA1_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_24));
- break;
- case 2:
- roce_set_field(mpt_entry->mpt_byte_40,
- MPT_BYTE_40_PA2_L_M,
- MPT_BYTE_40_PA2_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_44,
- MPT_BYTE_44_PA2_H_M,
- MPT_BYTE_44_PA2_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_16));
- break;
- case 3:
- roce_set_field(mpt_entry->mpt_byte_44,
- MPT_BYTE_44_PA3_L_M,
- MPT_BYTE_44_PA3_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_48,
- MPT_BYTE_48_PA3_H_M,
- MPT_BYTE_48_PA3_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_8));
- break;
- case 4:
- mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_56,
- MPT_BYTE_56_PA4_H_M,
- MPT_BYTE_56_PA4_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_32));
- break;
- case 5:
- roce_set_field(mpt_entry->mpt_byte_56,
- MPT_BYTE_56_PA5_L_M,
- MPT_BYTE_56_PA5_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_60,
- MPT_BYTE_60_PA5_H_M,
- MPT_BYTE_60_PA5_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_24));
- break;
- case 6:
- roce_set_field(mpt_entry->mpt_byte_60,
- MPT_BYTE_60_PA6_L_M,
- MPT_BYTE_60_PA6_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_64,
- MPT_BYTE_64_PA6_H_M,
- MPT_BYTE_64_PA6_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_16));
- break;
- default:
- break;
- }
- }
-
- mpt_entry->pbl_addr_l = cpu_to_le32(pbl_ba);
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S, upper_32_bits(pbl_ba));
-
- return 0;
-}
-
-static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
-{
- return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE);
-}
-
-static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
-{
- struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe);
-
- /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
- return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^
- !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL;
-}
-
-static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
-{
- return get_sw_cqe(hr_cq, hr_cq->cons_index);
-}
-
-static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
-{
- __le32 doorbell[2];
-
- doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1));
- doorbell[1] = 0;
- roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn);
-
- hns_roce_write64_k(doorbell, hr_cq->db_reg);
-}
-
-static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
- struct hns_roce_srq *srq)
-{
- struct hns_roce_cqe *cqe, *dest;
- u32 prod_index;
- int nfreed = 0;
- u8 owner_bit;
-
- for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index);
- ++prod_index) {
- if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
- break;
- }
-
- /*
- * Now backwards through the CQ, removing CQ entries
- * that match our QP by overwriting them with next entries.
- */
- while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
- cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
- if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) &
- HNS_ROCE_CQE_QPN_MASK) == qpn) {
- /* In v1 engine, not support SRQ */
- ++nfreed;
- } else if (nfreed) {
- dest = get_cqe(hr_cq, (prod_index + nfreed) &
- hr_cq->ib_cq.cqe);
- owner_bit = roce_get_bit(dest->cqe_byte_4,
- CQE_BYTE_4_OWNER_S);
- memcpy(dest, cqe, sizeof(*cqe));
- roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S,
- owner_bit);
- }
- }
-
- if (nfreed) {
- hr_cq->cons_index += nfreed;
- hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
-}
-
-static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
- struct hns_roce_srq *srq)
-{
- spin_lock_irq(&hr_cq->lock);
- __hns_roce_v1_cq_clean(hr_cq, qpn, srq);
- spin_unlock_irq(&hr_cq->lock);
-}
-
-static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq, void *mb_buf,
- u64 *mtts, dma_addr_t dma_handle)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf;
- struct hns_roce_cq_context *cq_context = mb_buf;
- dma_addr_t tptr_dma_addr;
- int offset;
-
- memset(cq_context, 0, sizeof(*cq_context));
-
- /* Get the tptr for this CQ. */
- offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
- tptr_dma_addr = tptr_buf->map + offset;
- hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
-
- /* Register cq_context members */
- roce_set_field(cq_context->cqc_byte_4,
- CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M,
- CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
- roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
- CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
-
- cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle);
-
- roce_set_field(cq_context->cqc_byte_12,
- CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
- CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S,
- ((u64)dma_handle >> 32));
- roce_set_field(cq_context->cqc_byte_12,
- CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M,
- CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S,
- ilog2(hr_cq->cq_depth));
- roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
- CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector);
-
- cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
-
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32);
- /* Dedicated hardware, directly set 0 */
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
- CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0);
- /**
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * calculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
- tptr_dma_addr >> 44);
-
- cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12));
-
- roce_set_field(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
- CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
- 0);
- /* The initial value of cq's ci is 0 */
- roce_set_field(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
- CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
-}
-
-static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
- enum ib_cq_notify_flags flags)
-{
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- u32 notification_flag;
- __le32 doorbell[2] = {};
-
- notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
- IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
- /*
- * flags = 0; Notification Flag = 1, next
- * flags = 1; Notification Flag = 0, solocited
- */
- doorbell[0] =
- cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
- roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S,
- hr_cq->cqn | notification_flag);
-
- hns_roce_write64_k(doorbell, hr_cq->db_reg);
-
- return 0;
-}
-
-static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
- struct hns_roce_qp **cur_qp, struct ib_wc *wc)
-{
- int qpn;
- int is_send;
- u16 wqe_ctr;
- u32 status;
- u32 opcode;
- struct hns_roce_cqe *cqe;
- struct hns_roce_qp *hr_qp;
- struct hns_roce_wq *wq;
- struct hns_roce_wqe_ctrl_seg *sq_wqe;
- struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
- struct device *dev = &hr_dev->pdev->dev;
-
- /* Find cqe according consumer index */
- cqe = next_cqe_sw(hr_cq);
- if (!cqe)
- return -EAGAIN;
-
- ++hr_cq->cons_index;
- /* Memory barrier */
- rmb();
- /* 0->SQ, 1->RQ */
- is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S));
-
- /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */
- if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) <= 1) {
- qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M,
- CQE_BYTE_20_PORT_NUM_S) +
- roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) *
- HNS_ROCE_MAX_PORTS;
- } else {
- qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S);
- }
-
- if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) {
- hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
- if (unlikely(!hr_qp)) {
- dev_err(dev, "CQ %06lx with entry for unknown QPN %06x\n",
- hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK));
- return -EINVAL;
- }
-
- *cur_qp = hr_qp;
- }
-
- wc->qp = &(*cur_qp)->ibqp;
- wc->vendor_err = 0;
-
- status = roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
- CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) &
- HNS_ROCE_CQE_STATUS_MASK;
- switch (status) {
- case HNS_ROCE_CQE_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR:
- wc->status = IB_WC_LOC_QP_OP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR:
- wc->status = IB_WC_BAD_RESP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
- wc->status = IB_WC_REM_INV_REQ_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR:
- wc->status = IB_WC_REM_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR:
- wc->status = IB_WC_REM_OP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
- wc->status = IB_WC_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
- wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /* CQE status error, directly return */
- if (wc->status != IB_WC_SUCCESS)
- return 0;
-
- if (is_send) {
- /* SQ conrespond to CQE */
- sq_wqe = hns_roce_get_send_wqe(*cur_qp,
- roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_WQE_INDEX_M,
- CQE_BYTE_4_WQE_INDEX_S) &
- ((*cur_qp)->sq.wqe_cnt-1));
- switch (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) {
- case HNS_ROCE_WQE_OPCODE_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- case HNS_ROCE_WQE_OPCODE_RDMA_READ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- break;
- case HNS_ROCE_WQE_OPCODE_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case HNS_ROCE_WQE_OPCODE_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- break;
- case HNS_ROCE_WQE_OPCODE_UD_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
- wc->wc_flags = (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_IMM ?
- IB_WC_WITH_IMM : 0);
-
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_WQE_INDEX_M,
- CQE_BYTE_4_WQE_INDEX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- } else {
- /* RQ conrespond to CQE */
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- opcode = roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_OPERATION_TYPE_M,
- CQE_BYTE_4_OPERATION_TYPE_S) &
- HNS_ROCE_CQE_OPCODE_MASK;
- switch (opcode) {
- case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE:
- wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data =
- cpu_to_be32(le32_to_cpu(cqe->immediate_data));
- break;
- case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE:
- if (roce_get_bit(cqe->cqe_byte_4,
- CQE_BYTE_4_IMM_INDICATOR_S)) {
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = cpu_to_be32(
- le32_to_cpu(cqe->immediate_data));
- } else {
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M,
- CQE_BYTE_20_SL_S);
- wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20,
- CQE_BYTE_20_REMOTE_QPN_M,
- CQE_BYTE_20_REMOTE_QPN_S);
- wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20,
- CQE_BYTE_20_GRH_PRESENT_S) ?
- IB_WC_GRH : 0);
- wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28,
- CQE_BYTE_28_P_KEY_IDX_M,
- CQE_BYTE_28_P_KEY_IDX_S);
- }
-
- return 0;
-}
-
-int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- struct hns_roce_qp *cur_qp = NULL;
- unsigned long flags;
- int npolled;
- int ret;
-
- spin_lock_irqsave(&hr_cq->lock, flags);
-
- for (npolled = 0; npolled < num_entries; ++npolled) {
- ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled);
- if (ret)
- break;
- }
-
- if (npolled) {
- *hr_cq->tptr_addr = hr_cq->cons_index &
- ((hr_cq->cq_depth << 1) - 1);
-
- hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
-
- spin_unlock_irqrestore(&hr_cq->lock, flags);
-
- if (ret == 0 || ret == -EAGAIN)
- return npolled;
- else
- return ret;
-}
-
-static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj,
- int step_idx)
-{
- struct hns_roce_v1_priv *priv = hr_dev->priv;
- struct device *dev = &hr_dev->pdev->dev;
- long end = HW_SYNC_TIMEOUT_MSECS;
- __le32 bt_cmd_val[2] = {0};
- unsigned long flags = 0;
- void __iomem *bt_cmd;
- u64 bt_ba = 0;
-
- switch (table->type) {
- case HEM_TYPE_QPC:
- bt_ba = priv->bt_table.qpc_buf.map >> 12;
- break;
- case HEM_TYPE_MTPT:
- bt_ba = priv->bt_table.mtpt_buf.map >> 12;
- break;
- case HEM_TYPE_CQC:
- bt_ba = priv->bt_table.cqc_buf.map >> 12;
- break;
- case HEM_TYPE_SRQC:
- dev_dbg(dev, "HEM_TYPE_SRQC not support.\n");
- return -EINVAL;
- default:
- return 0;
- }
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
- spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags);
-
- bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
- while (1) {
- if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!end) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(&hr_dev->bt_cmd_lock,
- flags);
- return -EBUSY;
- }
- } else {
- break;
- }
- mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
- end -= HW_SYNC_SLEEP_TIME_INTERVAL;
- }
-
- bt_cmd_val[0] = cpu_to_le32(bt_ba);
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
- hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
-
- spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags);
-
- return 0;
-}
-
-static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
- enum hns_roce_qp_state cur_state,
- enum hns_roce_qp_state new_state,
- struct hns_roce_qp_context *context,
- struct hns_roce_qp *hr_qp)
-{
- static const u16
- op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = {
- [HNS_ROCE_QP_STATE_RST] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
- },
- [HNS_ROCE_QP_STATE_INIT] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- /* Note: In v1 engine, HW doesn't support RST2INIT.
- * We use RST2INIT cmd instead of INIT2INIT.
- */
- [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
- [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP,
- },
- [HNS_ROCE_QP_STATE_RTR] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP,
- },
- [HNS_ROCE_QP_STATE_RTS] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP,
- [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP,
- },
- [HNS_ROCE_QP_STATE_SQD] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP,
- [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP,
- },
- [HNS_ROCE_QP_STATE_ERR] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- }
- };
-
- struct hns_roce_cmd_mailbox *mailbox;
- struct device *dev = &hr_dev->pdev->dev;
- int ret;
-
- if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
- new_state >= HNS_ROCE_QP_NUM_STATE ||
- !op[cur_state][new_state]) {
- dev_err(dev, "[modify_qp]not support state %d to %d\n",
- cur_state, new_state);
- return -EINVAL;
- }
-
- if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
- return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
- HNS_ROCE_CMD_2RST_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
- return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
- HNS_ROCE_CMD_2ERR_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- memcpy(mailbox->buf, context, sizeof(*context));
-
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
- op[cur_state][new_state],
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
-}
-
-static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
- u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba)
-{
- struct ib_device *ibdev = &hr_dev->ib_dev;
- int count;
-
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba);
- if (count < 1) {
- ibdev_err(ibdev, "Failed to find SQ ba\n");
- return -ENOBUFS;
- }
-
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba,
- 1, NULL);
- if (!count) {
- ibdev_err(ibdev, "Failed to find RQ ba\n");
- return -ENOBUFS;
- }
-
- return 0;
-}
-
-static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_sqp_context *context;
- dma_addr_t dma_handle = 0;
- u32 __iomem *addr;
- u64 sq_ba = 0;
- u64 rq_ba = 0;
- __le32 tmp;
- u32 reg_val;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- /* Search QP buf's MTTs */
- if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle))
- goto out;
-
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- roce_set_field(context->qp1c_bytes_4,
- QP1C_BYTES_4_SQ_WQE_SHIFT_M,
- QP1C_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qp1c_bytes_4,
- QP1C_BYTES_4_RQ_WQE_SHIFT_M,
- QP1C_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
- QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
-
- context->sq_rq_bt_l = cpu_to_le32(dma_handle);
- roce_set_field(context->qp1c_bytes_12,
- QP1C_BYTES_12_SQ_RQ_BT_H_M,
- QP1C_BYTES_12_SQ_RQ_BT_H_S,
- upper_32_bits(dma_handle));
-
- roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M,
- QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head);
- roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M,
- QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
- roce_set_bit(context->qp1c_bytes_16,
- QP1C_BYTES_16_SIGNALING_TYPE_S,
- hr_qp->sq_signal_bits);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
- 1);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
- 1);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S,
- 0);
-
- roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M,
- QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head);
- roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M,
- QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
-
- context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
-
- roce_set_field(context->qp1c_bytes_28,
- QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
- QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S,
- upper_32_bits(rq_ba));
- roce_set_field(context->qp1c_bytes_28,
- QP1C_BYTES_28_RQ_CUR_IDX_M,
- QP1C_BYTES_28_RQ_CUR_IDX_S, 0);
-
- roce_set_field(context->qp1c_bytes_32,
- QP1C_BYTES_32_RX_CQ_NUM_M,
- QP1C_BYTES_32_RX_CQ_NUM_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
- roce_set_field(context->qp1c_bytes_32,
- QP1C_BYTES_32_TX_CQ_NUM_M,
- QP1C_BYTES_32_TX_CQ_NUM_S,
- to_hr_cq(ibqp->send_cq)->cqn);
-
- context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
-
- roce_set_field(context->qp1c_bytes_40,
- QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
- QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S,
- upper_32_bits(sq_ba));
- roce_set_field(context->qp1c_bytes_40,
- QP1C_BYTES_40_SQ_CUR_IDX_M,
- QP1C_BYTES_40_SQ_CUR_IDX_S, 0);
-
- /* Copy context to QP1C register */
- addr = (u32 __iomem *)(hr_dev->reg_base +
- ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context));
-
- writel(le32_to_cpu(context->qp1c_bytes_4), addr);
- writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1);
- writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2);
- writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3);
- writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4);
- writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5);
- writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6);
- writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7);
- writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8);
- writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9);
- }
-
- /* Modify QP1C status */
- reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context));
- tmp = cpu_to_le32(reg_val);
- roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
- ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
- reg_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context), reg_val);
-
- hr_qp->state = new_state;
- if (new_state == IB_QPS_RESET) {
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
-
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- }
-
- kfree(context);
- return 0;
-
-out:
- kfree(context);
- return -EINVAL;
-}
-
-static bool check_qp_state(enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- static const bool sm[][IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = { [IB_QPS_RESET] = true,
- [IB_QPS_INIT] = true },
- [IB_QPS_INIT] = { [IB_QPS_RESET] = true,
- [IB_QPS_INIT] = true,
- [IB_QPS_RTR] = true,
- [IB_QPS_ERR] = true },
- [IB_QPS_RTR] = { [IB_QPS_RESET] = true,
- [IB_QPS_RTS] = true,
- [IB_QPS_ERR] = true },
- [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true },
- [IB_QPS_SQD] = {},
- [IB_QPS_SQE] = {},
- [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
- };
-
- return sm[cur_state][new_state];
-}
-
-static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_context *context;
- const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
- dma_addr_t dma_handle_2 = 0;
- dma_addr_t dma_handle = 0;
- __le32 doorbell[2] = {0};
- u64 *mtts_2 = NULL;
- int ret = -EINVAL;
- const u8 *smac;
- u64 sq_ba = 0;
- u64 rq_ba = 0;
- u32 port;
- u32 port_num;
- u8 *dmac;
-
- if (!check_qp_state(cur_state, new_state)) {
- ibdev_err(ibqp->device,
- "not support QP(%u) status from %d to %d\n",
- ibqp->qp_num, cur_state, new_state);
- return -EINVAL;
- }
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- /* Search qp buf's mtts */
- if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle))
- goto out;
-
- /* Search IRRL's mtts */
- mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
- hr_qp->qpn, &dma_handle_2);
- if (mtts_2 == NULL) {
- dev_err(dev, "qp irrl_table find failed\n");
- goto out;
- }
-
- /*
- * Reset to init
- * Mandatory param:
- * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
- * Optional param: NA
- */
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
- to_hr_qp_type(hr_qp->ibqp.qp_type));
-
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
- );
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
- );
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_PD_M,
- QP_CONTEXT_QPC_BYTES_4_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
- hr_qp->access_flags = attr->qp_access_flags;
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
- to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
-
- if (ibqp->srq)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
- to_hr_srq(ibqp->srq)->srqn);
-
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- attr->pkey_index);
- hr_qp->pkey_index = attr->pkey_index;
- roce_set_field(context->qpc_bytes_16,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
- } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
- to_hr_qp_type(hr_qp->ibqp.qp_type));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE));
- } else {
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(hr_qp->access_flags &
- IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(hr_qp->access_flags &
- IB_ACCESS_REMOTE_WRITE));
- }
-
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_PD_M,
- QP_CONTEXT_QPC_BYTES_4_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
-
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
- to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
-
- if (ibqp->srq)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
- to_hr_srq(ibqp->srq)->srqn);
- if (attr_mask & IB_QP_PKEY_INDEX)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- attr->pkey_index);
- else
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- hr_qp->pkey_index);
-
- roce_set_field(context->qpc_bytes_16,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
- } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_PKEY_INDEX) ||
- (attr_mask & IB_QP_QKEY)) {
- dev_err(dev, "INIT2RTR attr_mask error\n");
- goto out;
- }
-
- dmac = (u8 *)attr->ah_attr.roce.dmac;
-
- context->sq_rq_bt_l = cpu_to_le32(dma_handle);
- roce_set_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
- QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
- upper_32_bits(dma_handle));
- roce_set_bit(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S,
- 1);
- roce_set_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
- attr->min_rnr_timer);
- context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2));
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
- ((u32)(dma_handle_2 >> 32)) &
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M);
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M,
- QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0);
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S,
- 1);
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
- hr_qp->sq_signal_bits);
-
- port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
- hr_qp->port;
- smac = (const u8 *)hr_dev->dev_addr[port];
- /* when dmac equals smac or loop_idc is 1, it should loopback */
- if (ether_addr_equal_unaligned(dmac, smac) ||
- hr_dev->loop_idc == 0x1)
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
-
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
- rdma_ah_get_ah_flags(&attr->ah_attr));
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S,
- ilog2((unsigned int)attr->max_dest_rd_atomic));
-
- if (attr_mask & IB_QP_DEST_QPN)
- roce_set_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_S,
- attr->dest_qp_num);
-
- /* Configure GID index */
- port_num = rdma_ah_get_port_num(&attr->ah_attr);
- roce_set_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev,
- port_num - 1,
- grh->sgid_index));
-
- memcpy(&(context->dmac_l), dmac, 4);
-
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_S,
- *((u16 *)(&dmac[4])));
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M,
- QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S,
- rdma_ah_get_static_rate(&attr->ah_attr));
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S,
- grh->hop_limit);
-
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S,
- grh->flow_label);
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S,
- grh->traffic_class);
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_MTU_M,
- QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu);
-
- memcpy(context->dgid, grh->dgid.raw,
- sizeof(grh->dgid.raw));
-
- dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l,
- roce_get_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_S));
-
- roce_set_field(context->qpc_bytes_68,
- QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M,
- QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_set_field(context->qpc_bytes_68,
- QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
-
- context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
-
- roce_set_field(context->qpc_bytes_76,
- QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S,
- upper_32_bits(rq_ba));
- roce_set_field(context->qpc_bytes_76,
- QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M,
- QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0);
-
- context->rx_rnr_time = 0;
-
- roce_set_field(context->qpc_bytes_84,
- QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M,
- QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S,
- attr->rq_psn - 1);
- roce_set_field(context->qpc_bytes_84,
- QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M,
- QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0);
-
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S,
- attr->rq_psn);
- roce_set_bit(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0);
- roce_set_bit(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0);
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S,
- 0);
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S,
- 0);
-
- context->dma_length = 0;
- context->r_key = 0;
- context->va_l = 0;
- context->va_h = 0;
-
- roce_set_field(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0);
- roce_set_bit(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0);
- roce_set_bit(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_112,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0);
- roce_set_field(context->qpc_bytes_112,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0);
-
- /* For chip resp ack */
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
- hr_qp->phy_port);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
- } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
- /* If exist optional param, return error */
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_QKEY) ||
- (attr_mask & IB_QP_PATH_MIG_STATE) ||
- (attr_mask & IB_QP_CUR_STATE) ||
- (attr_mask & IB_QP_MIN_RNR_TIMER)) {
- dev_err(dev, "RTR2RTS attr_mask error\n");
- goto out;
- }
-
- context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
-
- roce_set_field(context->qpc_bytes_120,
- QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S,
- upper_32_bits(sq_ba));
-
- roce_set_field(context->qpc_bytes_124,
- QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M,
- QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0);
- roce_set_field(context->qpc_bytes_124,
- QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M,
- QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0);
-
- roce_set_field(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S,
- attr->sq_psn);
- roce_set_bit(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0);
- roce_set_field(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M,
- QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S,
- 0);
- roce_set_bit(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_132,
- QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M,
- QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0);
- roce_set_field(context->qpc_bytes_132,
- QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M,
- QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0);
-
- roce_set_field(context->qpc_bytes_136,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S,
- attr->sq_psn);
- roce_set_field(context->qpc_bytes_136,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S,
- attr->sq_psn);
-
- roce_set_field(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S,
- (attr->sq_psn >> SQ_PSN_SHIFT));
- roce_set_field(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0);
- roce_set_bit(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M,
- QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S,
- attr->retry_cnt);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S,
- attr->rnr_retry);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_LSN_M,
- QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100);
-
- context->rnr_retry = 0;
-
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M,
- QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S,
- attr->retry_cnt);
- if (attr->timeout < 0x12) {
- dev_info(dev, "ack timeout value(0x%x) must bigger than 0x12.\n",
- attr->timeout);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
- 0x12);
- } else {
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
- attr->timeout);
- }
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M,
- QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S,
- attr->rnr_retry);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
- hr_qp->phy_port);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S,
- ilog2((unsigned int)attr->max_rd_atomic));
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0);
- context->pkt_use_len = 0;
-
- roce_set_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn);
- roce_set_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M,
- QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0);
-
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S,
- attr->sq_psn);
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M,
- QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0);
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M,
- QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0);
- context->sge_use_len = 0;
-
- roce_set_field(context->qpc_bytes_176,
- QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0);
- roce_set_field(context->qpc_bytes_176,
- QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S,
- 0);
- roce_set_field(context->qpc_bytes_180,
- QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0);
- roce_set_field(context->qpc_bytes_180,
- QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
- QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
-
- context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
-
- roce_set_field(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S,
- upper_32_bits(sq_ba));
- roce_set_bit(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0);
- roce_set_field(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S,
- 0);
- }
-
- /* Every status migrate must change state */
- roce_set_field(context->qpc_bytes_144,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
-
- /* SW pass context to HW */
- ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state),
- to_hns_roce_state(new_state), context,
- hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_qp_modify failed\n");
- goto out;
- }
-
- /*
- * Use rst2init to instead of init2init with drv,
- * need to hw to flash RQ HEAD by DB again
- */
- if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
- RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
- roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
- RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
- roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_CMD_M,
- RQ_DOORBELL_U32_8_CMD_S, 1);
- roce_set_bit(doorbell[1], RQ_DOORBELL_U32_8_HW_SYNC_S, 1);
-
- if (ibqp->uobject) {
- hr_qp->rq.db_reg = hr_dev->reg_base +
- hr_dev->odb_offset +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
- }
-
- hns_roce_write64_k(doorbell, hr_qp->rq.db_reg);
- }
-
- hr_qp->state = new_state;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- hr_qp->resp_depth = attr->max_dest_rd_atomic;
- if (attr_mask & IB_QP_PORT) {
- hr_qp->port = attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- }
-
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
-
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- }
-out:
- kfree(context);
- return ret;
-}
-
-static int hns_roce_v1_modify_qp(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
- return -EOPNOTSUPP;
-
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
- return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state,
- new_state);
- else
- return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state,
- new_state);
-}
-
-static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state)
-{
- switch (state) {
- case HNS_ROCE_QP_STATE_RST:
- return IB_QPS_RESET;
- case HNS_ROCE_QP_STATE_INIT:
- return IB_QPS_INIT;
- case HNS_ROCE_QP_STATE_RTR:
- return IB_QPS_RTR;
- case HNS_ROCE_QP_STATE_RTS:
- return IB_QPS_RTS;
- case HNS_ROCE_QP_STATE_SQD:
- return IB_QPS_SQD;
- case HNS_ROCE_QP_STATE_ERR:
- return IB_QPS_ERR;
- default:
- return IB_QPS_ERR;
- }
-}
-
-static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_qp_context *hr_context)
-{
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
- HNS_ROCE_CMD_QUERY_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (!ret)
- memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
- else
- dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n");
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return ret;
-}
-
-static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_sqp_context context;
- u32 addr;
-
- mutex_lock(&hr_qp->mutex);
-
- if (hr_qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- addr = ROCEE_QP1C_CFG0_0_REG +
- hr_qp->port * sizeof(struct hns_roce_sqp_context);
- context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr));
- context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1));
- context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2));
- context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3));
- context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4));
- context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5));
- context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6));
- context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7));
- context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8));
- context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9));
-
- hr_qp->state = roce_get_field(context.qp1c_bytes_4,
- QP1C_BYTES_4_QP_STATE_M,
- QP1C_BYTES_4_QP_STATE_S);
- qp_attr->qp_state = hr_qp->state;
- qp_attr->path_mtu = IB_MTU_256;
- qp_attr->path_mig_state = IB_MIG_ARMED;
- qp_attr->qkey = QKEY_VAL;
- qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- qp_attr->rq_psn = 0;
- qp_attr->sq_psn = 0;
- qp_attr->dest_qp_num = 1;
- qp_attr->qp_access_flags = 6;
-
- qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
- QP1C_BYTES_20_PKEY_IDX_M,
- QP1C_BYTES_20_PKEY_IDX_S);
- qp_attr->port_num = hr_qp->port + 1;
- qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 0;
- qp_attr->max_dest_rd_atomic = 0;
- qp_attr->min_rnr_timer = 0;
- qp_attr->timeout = 0;
- qp_attr->retry_cnt = 0;
- qp_attr->rnr_retry = 0;
- qp_attr->alt_timeout = 0;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- qp_attr->cap.max_inline_data = 0;
- qp_init_attr->cap = qp_attr->cap;
- qp_init_attr->create_flags = 0;
-
- mutex_unlock(&hr_qp->mutex);
-
- return 0;
-}
-
-static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_context *context;
- int tmp_qp_state;
- int ret = 0;
- int state;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- memset(qp_attr, 0, sizeof(*qp_attr));
- memset(qp_init_attr, 0, sizeof(*qp_init_attr));
-
- mutex_lock(&hr_qp->mutex);
-
- if (hr_qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context);
- if (ret) {
- dev_err(dev, "query qpc error\n");
- ret = -EINVAL;
- goto out;
- }
-
- state = roce_get_field(context->qpc_bytes_144,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S);
- tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state);
- if (tmp_qp_state == -1) {
- dev_err(dev, "to_ib_qp_state error\n");
- ret = -EINVAL;
- goto out;
- }
- hr_qp->state = (u8)tmp_qp_state;
- qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
- qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_MTU_M,
- QP_CONTEXT_QPC_BYTES_48_MTU_S);
- qp_attr->path_mig_state = IB_MIG_ARMED;
- qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- if (hr_qp->ibqp.qp_type == IB_QPT_UD)
- qp_attr->qkey = QKEY_VAL;
-
- qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S);
- qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S);
- qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_S);
- qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) |
- ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) |
- ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3);
-
- if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
- struct ib_global_route *grh =
- rdma_ah_retrieve_grh(&qp_attr->ah_attr);
-
- rdma_ah_set_sl(&qp_attr->ah_attr,
- roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S));
- rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
- grh->flow_label =
- roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
- grh->sgid_index =
- roce_get_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
- grh->hop_limit =
- roce_get_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
- grh->traffic_class =
- roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
-
- memcpy(grh->dgid.raw, context->dgid,
- sizeof(grh->dgid.raw));
- }
-
- qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
- qp_attr->port_num = hr_qp->port + 1;
- qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 1 << roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S);
- qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S);
- qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S));
- qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S));
- qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
- qp_attr->rnr_retry = (u8)le32_to_cpu(context->rnr_retry);
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
-
- if (!ibqp->uobject) {
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- } else {
- qp_attr->cap.max_send_wr = 0;
- qp_attr->cap.max_send_sge = 0;
- }
-
- qp_init_attr->cap = qp_attr->cap;
-
-out:
- mutex_unlock(&hr_qp->mutex);
- kfree(context);
- return ret;
-}
-
-static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-
- return hr_qp->doorbell_qpn <= 1 ?
- hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
- hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
-}
-
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_cq *send_cq, *recv_cq;
- int ret;
-
- ret = hns_roce_v1_modify_qp(ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET);
- if (ret)
- return ret;
-
- send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
- recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
-
- hns_roce_lock_cqs(send_cq, recv_cq);
- if (!udata) {
- if (recv_cq)
- __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn,
- (hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) :
- NULL));
-
- if (send_cq && send_cq != recv_cq)
- __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
- }
- hns_roce_qp_remove(hr_dev, hr_qp);
- hns_roce_unlock_cqs(send_cq, recv_cq);
-
- hns_roce_qp_destroy(hr_dev, hr_qp, udata);
-
- return 0;
-}
-
-static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqe_cnt_ori;
- u32 cqe_cnt_cur;
- int wait_time = 0;
-
- /*
- * Before freeing cq buffer, we need to ensure that the outstanding CQE
- * have been written by checking the CQE counter.
- */
- cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
- while (1) {
- if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) &
- HNS_ROCE_CQE_WCMD_EMPTY_BIT)
- break;
-
- cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
- if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT)
- break;
-
- msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS);
- if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
- dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
- hr_cq->cqn);
- break;
- }
- wait_time++;
- }
- return 0;
-}
-
-static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not)
-{
- roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) |
- (req_not << eq->log_entries), eq->db_reg);
-}
-
-static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe, int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LWQCE_QPC_ERROR:
- dev_warn(dev, "QP %d, QPC error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_MTU_ERROR:
- dev_warn(dev, "QP %d, MTU error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
- dev_warn(dev, "QP %d, WQE shift error\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SL_ERROR:
- dev_warn(dev, "QP %d, SL error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_PORT_ERROR:
- dev_warn(dev, "QP %d, port error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_v1_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Access Violation Work Queue Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
- dev_warn(dev, "QP %d, R_key violation.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_LENGTH_ERROR:
- dev_warn(dev, "QP %d, length error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_VA_ERROR:
- dev_warn(dev, "QP %d, VA error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_PD_ERROR:
- dev_err(dev, "QP %d, PD error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
- dev_warn(dev, "QP %d, rw acc error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
- dev_warn(dev, "QP %d, key state error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
- dev_warn(dev, "QP %d, MR operation error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- int phy_port;
- int qpn;
-
- qpn = roce_get_field(aeqe->event.queue_event.num,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
- phy_port = roce_get_field(aeqe->event.queue_event.num,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
- if (qpn <= 1)
- qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
- "QP %d, phy_port %d.\n", qpn, phy_port);
- break;
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- hns_roce_v1_wq_catas_err_handle(hr_dev, aeqe, qpn);
- break;
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v1_local_wq_access_err_handle(hr_dev, aeqe, qpn);
- break;
- default:
- break;
- }
-
- hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqn;
-
- cqn = roce_get_field(aeqe->event.queue_event.num,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_warn(dev, "CQ 0x%x access err.\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
- break;
- default:
- break;
- }
-
- hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
-static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
- dev_warn(dev, "SDB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
- dev_warn(dev, "SDB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
- dev_warn(dev, "ODB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
- dev_warn(dev, "ODB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- default:
- break;
- }
-}
-
-static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE;
-
- return (struct hns_roce_aeqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_aeqe *next_aeqe_sw_v1(struct hns_roce_eq *eq)
-{
- struct hns_roce_aeqe *aeqe = get_aeqe_v1(eq, eq->cons_index);
-
- return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
- !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
-}
-
-static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_aeqe *aeqe;
- int aeqes_found = 0;
- int event_type;
-
- while ((aeqe = next_aeqe_sw_v1(eq))) {
- /* Make sure we read the AEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- dev_dbg(dev, "aeqe = %pK, aeqe->asyn.event_type = 0x%lx\n",
- aeqe,
- roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
- event_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_warn(dev, "PATH MIG not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_warn(dev, "COMMUNICATION established\n");
- break;
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "SQ DRAINED not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "PATH MIG failed\n");
- break;
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v1_qp_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
- dev_warn(dev, "SRQ not support!\n");
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- hns_roce_v1_cq_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
- dev_warn(dev, "port change.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_MB:
- hns_roce_cmd_event(hr_dev,
- le16_to_cpu(aeqe->event.cmd.token),
- aeqe->event.cmd.status,
- le64_to_cpu(aeqe->event.cmd.out_param
- ));
- break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- hns_roce_v1_db_overflow_handle(hr_dev, aeqe);
- break;
- default:
- dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n",
- event_type, eq->eqn, eq->cons_index);
- break;
- }
-
- eq->cons_index++;
- aeqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1)
- eq->cons_index = 0;
- }
-
- set_eq_cons_index_v1(eq, 0);
-
- return aeqes_found;
-}
-
-static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE;
-
- return (struct hns_roce_ceqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_ceqe *next_ceqe_sw_v1(struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe = get_ceqe_v1(eq, eq->cons_index);
-
- return (!!(roce_get_bit(ceqe->comp,
- HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
- (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
-}
-
-static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe;
- int ceqes_found = 0;
- u32 cqn;
-
- while ((ceqe = next_ceqe_sw_v1(eq))) {
- /* Make sure we read CEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- cqn = roce_get_field(ceqe->comp,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqes_found = 1;
-
- if (eq->cons_index >
- EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1)
- eq->cons_index = 0;
- }
-
- set_eq_cons_index_v1(eq, 0);
-
- return ceqes_found;
-}
-
-static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr)
-{
- struct hns_roce_eq *eq = eq_ptr;
- struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work;
-
- if (eq->type_flag == HNS_ROCE_CEQ)
- /* CEQ irq routine, CEQ is pulse irq, not clear */
- int_work = hns_roce_v1_ceq_int(hr_dev, eq);
- else
- /* AEQ irq routine, AEQ is pulse irq, not clear */
- int_work = hns_roce_v1_aeq_int(hr_dev, eq);
-
- return IRQ_RETVAL(int_work);
-}
-
-static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
-{
- struct hns_roce_dev *hr_dev = dev_id;
- struct device *dev = &hr_dev->pdev->dev;
- int int_work = 0;
- u32 caepaemask_val;
- u32 cealmovf_val;
- u32 caepaest_val;
- u32 aeshift_val;
- u32 ceshift_val;
- u32 cemask_val;
- __le32 tmp;
- int i;
-
- /*
- * Abnormal interrupt:
- * AEQ overflow, ECC multi-bit err, CEQ overflow must clear
- * interrupt, mask irq, clear irq, cancel mask operation
- */
- aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
- tmp = cpu_to_le32(aeshift_val);
-
- /* AEQE overflow */
- if (roce_get_bit(tmp,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "AEQ overflow!\n");
-
- /* Set mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- tmp = cpu_to_le32(caepaemask_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- caepaemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
- tmp = cpu_to_le32(caepaest_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
- caepaest_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
-
- /* Clear mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- tmp = cpu_to_le32(caepaemask_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- caepaemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
- }
-
- /* CEQ almost overflow */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(ceshift_val);
-
- if (roce_get_bit(tmp,
- ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
- int_work++;
-
- /* Set mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(cemask_val);
- roce_set_bit(tmp,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- cemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- cealmovf_val = roce_read(hr_dev,
- ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(cealmovf_val);
- roce_set_bit(tmp,
- ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
- 1);
- cealmovf_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET, cealmovf_val);
-
- /* Clear mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(cemask_val);
- roce_set_bit(tmp,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- cemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
- }
- }
-
- /* ECC multi-bit error alarm */
- dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
-
- dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
-
- return IRQ_RETVAL(int_work);
-}
-
-static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
-{
- u32 aemask_val;
- int masken = 0;
- __le32 tmp;
- int i;
-
- /* AEQ INT */
- aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- tmp = cpu_to_le32(aemask_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- masken);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
- aemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
-
- /* CEQ INT */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- /* IRQ mask */
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, masken);
- }
-}
-
-static void hns_roce_v1_free_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
- int i;
-
- if (!eq->buf_list)
- return;
-
- for (i = 0; i < npages; ++i)
- dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
- eq->buf_list[i].buf, eq->buf_list[i].map);
-
- kfree(eq->buf_list);
-}
-
-static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
- int enable_flag)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
- __le32 tmp;
- u32 val;
-
- val = readl(eqc);
- tmp = cpu_to_le32(val);
-
- if (enable_flag)
- roce_set_field(tmp,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_VALID);
- else
- roce_set_field(tmp,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
-
- val = le32_to_cpu(tmp);
- writel(val, eqc);
-}
-
-static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t tmp_dma_addr;
- u32 eqcuridx_val;
- u32 eqconsindx_val;
- u32 eqshift_val;
- __le32 tmp2 = 0;
- __le32 tmp1 = 0;
- __le32 tmp = 0;
- int num_bas;
- int ret;
- int i;
-
- num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
-
- if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
- dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
- (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
- num_bas);
- return -EINVAL;
- }
-
- eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
- if (!eq->buf_list)
- return -ENOMEM;
-
- for (i = 0; i < num_bas; ++i) {
- eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
- &tmp_dma_addr,
- GFP_KERNEL);
- if (!eq->buf_list[i].buf) {
- ret = -ENOMEM;
- goto err_out_free_pages;
- }
-
- eq->buf_list[i].map = tmp_dma_addr;
- }
- eq->cons_index = 0;
- roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
- roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
- eq->log_entries);
- eqshift_val = le32_to_cpu(tmp);
- writel(eqshift_val, eqc);
-
- /* Configure eq extended address 12~44bit */
- writel((u32)(eq->buf_list[0].map >> 12), eqc + 4);
-
- /*
- * Configure eq extended address 45~49 bit.
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * calculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
- eq->buf_list[0].map >> 44);
- roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
- eqcuridx_val = le32_to_cpu(tmp1);
- writel(eqcuridx_val, eqc + 8);
-
- /* Configure eq consumer index */
- roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
- eqconsindx_val = le32_to_cpu(tmp2);
- writel(eqconsindx_val, eqc + 0xc);
-
- return 0;
-
-err_out_free_pages:
- for (i -= 1; i >= 0; i--)
- dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
- eq->buf_list[i].map);
-
- kfree(eq->buf_list);
- return ret;
-}
-
-static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_eq *eq;
- int irq_num;
- int eq_num;
- int ret;
- int i, j;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- irq_num = eq_num + hr_dev->caps.num_other_vectors;
-
- eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
- if (!eq_table->eq)
- return -ENOMEM;
-
- eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
- GFP_KERNEL);
- if (!eq_table->eqc_base) {
- ret = -ENOMEM;
- goto err_eqc_base_alloc_fail;
- }
-
- for (i = 0; i < eq_num; i++) {
- eq = &eq_table->eq[i];
- eq->hr_dev = hr_dev;
- eq->eqn = i;
- eq->irq = hr_dev->irq[i];
- eq->log_page_size = PAGE_SHIFT;
-
- if (i < hr_dev->caps.num_comp_vectors) {
- /* CEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_SHIFT_0_REG +
- CEQ_REG_OFFSET * i;
- eq->type_flag = HNS_ROCE_CEQ;
- eq->db_reg = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
- CEQ_REG_OFFSET * i;
- eq->entries = hr_dev->caps.ceqe_depth;
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_CEQE_SIZE;
- } else {
- /* AEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
- eq->type_flag = HNS_ROCE_AEQ;
- eq->db_reg = hr_dev->reg_base +
- ROCEE_CAEP_AEQE_CONS_IDX_REG;
- eq->entries = hr_dev->caps.aeqe_depth;
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_AEQE_SIZE;
- }
- }
-
- /* Disable irq */
- hns_roce_v1_int_mask_enable(hr_dev);
-
- /* Configure ce int interval */
- roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_INTERVAL);
-
- /* Configure ce int burst num */
- roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
-
- for (i = 0; i < eq_num; i++) {
- ret = hns_roce_v1_create_eq(hr_dev, &eq_table->eq[i]);
- if (ret) {
- dev_err(dev, "eq create failed\n");
- goto err_create_eq_fail;
- }
- }
-
- for (j = 0; j < irq_num; j++) {
- if (j < eq_num)
- ret = request_irq(hr_dev->irq[j],
- hns_roce_v1_msix_interrupt_eq, 0,
- hr_dev->irq_names[j],
- &eq_table->eq[j]);
- else
- ret = request_irq(hr_dev->irq[j],
- hns_roce_v1_msix_interrupt_abn, 0,
- hr_dev->irq_names[j], hr_dev);
-
- if (ret) {
- dev_err(dev, "request irq error!\n");
- goto err_request_irq_fail;
- }
- }
-
- for (i = 0; i < eq_num; i++)
- hns_roce_v1_enable_eq(hr_dev, i, EQ_ENABLE);
-
- return 0;
-
-err_request_irq_fail:
- for (j -= 1; j >= 0; j--)
- free_irq(hr_dev->irq[j], &eq_table->eq[j]);
-
-err_create_eq_fail:
- for (i -= 1; i >= 0; i--)
- hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
-
- kfree(eq_table->eqc_base);
-
-err_eqc_base_alloc_fail:
- kfree(eq_table->eq);
-
- return ret;
-}
-
-static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- int irq_num;
- int eq_num;
- int i;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- irq_num = eq_num + hr_dev->caps.num_other_vectors;
- for (i = 0; i < eq_num; i++) {
- /* Disable EQ */
- hns_roce_v1_enable_eq(hr_dev, i, EQ_DISABLE);
-
- free_irq(hr_dev->irq[i], &eq_table->eq[i]);
-
- hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
- }
- for (i = eq_num; i < irq_num; i++)
- free_irq(hr_dev->irq[i], hr_dev);
-
- kfree(eq_table->eqc_base);
- kfree(eq_table->eq);
-}
-
-static const struct ib_device_ops hns_roce_v1_dev_ops = {
- .destroy_qp = hns_roce_v1_destroy_qp,
- .poll_cq = hns_roce_v1_poll_cq,
- .post_recv = hns_roce_v1_post_recv,
- .post_send = hns_roce_v1_post_send,
- .query_qp = hns_roce_v1_query_qp,
- .req_notify_cq = hns_roce_v1_req_notify_cq,
-};
-
-static const struct hns_roce_hw hns_roce_hw_v1 = {
- .reset = hns_roce_v1_reset,
- .hw_profile = hns_roce_v1_profile,
- .hw_init = hns_roce_v1_init,
- .hw_exit = hns_roce_v1_exit,
- .post_mbox = hns_roce_v1_post_mbox,
- .poll_mbox_done = hns_roce_v1_chk_mbox,
- .set_gid = hns_roce_v1_set_gid,
- .set_mac = hns_roce_v1_set_mac,
- .set_mtu = hns_roce_v1_set_mtu,
- .write_mtpt = hns_roce_v1_write_mtpt,
- .write_cqc = hns_roce_v1_write_cqc,
- .set_hem = hns_roce_v1_set_hem,
- .clear_hem = hns_roce_v1_clear_hem,
- .modify_qp = hns_roce_v1_modify_qp,
- .dereg_mr = hns_roce_v1_dereg_mr,
- .destroy_cq = hns_roce_v1_destroy_cq,
- .init_eq = hns_roce_v1_init_eq_table,
- .cleanup_eq = hns_roce_v1_cleanup_eq_table,
- .hns_roce_dev_ops = &hns_roce_v1_dev_ops,
-};
-
-static const struct of_device_id hns_roce_of_match[] = {
- { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, },
- {},
-};
-MODULE_DEVICE_TABLE(of, hns_roce_of_match);
-
-static const struct acpi_device_id hns_roce_acpi_match[] = {
- { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 },
- {},
-};
-MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match);
-
-static struct
-platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode)
-{
- struct device *dev;
-
- /* get the 'device' corresponding to the matching 'fwnode' */
- dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
- /* get the platform device */
- return dev ? to_platform_device(dev) : NULL;
-}
-
-static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct platform_device *pdev = NULL;
- struct net_device *netdev = NULL;
- struct device_node *net_node;
- int port_cnt = 0;
- u8 phy_port;
- int ret;
- int i;
-
- /* check if we are compatible with the underlying SoC */
- if (dev_of_node(dev)) {
- const struct of_device_id *of_id;
-
- of_id = of_match_node(hns_roce_of_match, dev->of_node);
- if (!of_id) {
- dev_err(dev, "device is not compatible!\n");
- return -ENXIO;
- }
- hr_dev->hw = (const struct hns_roce_hw *)of_id->data;
- if (!hr_dev->hw) {
- dev_err(dev, "couldn't get H/W specific DT data!\n");
- return -ENXIO;
- }
- } else if (is_acpi_device_node(dev->fwnode)) {
- const struct acpi_device_id *acpi_id;
-
- acpi_id = acpi_match_device(hns_roce_acpi_match, dev);
- if (!acpi_id) {
- dev_err(dev, "device is not compatible!\n");
- return -ENXIO;
- }
- hr_dev->hw = (const struct hns_roce_hw *) acpi_id->driver_data;
- if (!hr_dev->hw) {
- dev_err(dev, "couldn't get H/W specific ACPI data!\n");
- return -ENXIO;
- }
- } else {
- dev_err(dev, "can't read compatibility data from DT or ACPI\n");
- return -ENXIO;
- }
-
- /* get the mapped register base address */
- hr_dev->reg_base = devm_platform_ioremap_resource(hr_dev->pdev, 0);
- if (IS_ERR(hr_dev->reg_base))
- return PTR_ERR(hr_dev->reg_base);
-
- /* read the node_guid of IB device from the DT or ACPI */
- ret = device_property_read_u8_array(dev, "node-guid",
- (u8 *)&hr_dev->ib_dev.node_guid,
- GUID_LEN);
- if (ret) {
- dev_err(dev, "couldn't get node_guid from DT or ACPI!\n");
- return ret;
- }
-
- /* get the RoCE associated ethernet ports or netdevices */
- for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) {
- if (dev_of_node(dev)) {
- net_node = of_parse_phandle(dev->of_node, "eth-handle",
- i);
- if (!net_node)
- continue;
- pdev = of_find_device_by_node(net_node);
- } else if (is_acpi_device_node(dev->fwnode)) {
- struct fwnode_reference_args args;
-
- ret = acpi_node_get_property_reference(dev->fwnode,
- "eth-handle",
- i, &args);
- if (ret)
- continue;
- pdev = hns_roce_find_pdev(args.fwnode);
- } else {
- dev_err(dev, "cannot read data from DT or ACPI\n");
- return -ENXIO;
- }
-
- if (pdev) {
- netdev = platform_get_drvdata(pdev);
- phy_port = (u8)i;
- if (netdev) {
- hr_dev->iboe.netdevs[port_cnt] = netdev;
- hr_dev->iboe.phy_port[port_cnt] = phy_port;
- } else {
- dev_err(dev, "no netdev found with pdev %s\n",
- pdev->name);
- return -ENODEV;
- }
- port_cnt++;
- }
- }
-
- if (port_cnt == 0) {
- dev_err(dev, "unable to get eth-handle for available ports!\n");
- return -EINVAL;
- }
-
- hr_dev->caps.num_ports = port_cnt;
-
- /* cmd issue mode: 0 is poll, 1 is event */
- hr_dev->cmd_mod = 1;
- hr_dev->loop_idc = 0;
- hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
- hr_dev->odb_offset = ROCEE_DB_OTHERS_L_0_REG;
-
- /* read the interrupt names from the DT or ACPI */
- ret = device_property_read_string_array(dev, "interrupt-names",
- hr_dev->irq_names,
- HNS_ROCE_V1_MAX_IRQ_NUM);
- if (ret < 0) {
- dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
- return ret;
- }
-
- /* fetch the interrupt numbers */
- for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
- hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
- if (hr_dev->irq[i] <= 0)
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * hns_roce_probe - RoCE driver entrance
- * @pdev: pointer to platform device
- * Return : int
- *
- */
-static int hns_roce_probe(struct platform_device *pdev)
-{
- int ret;
- struct hns_roce_dev *hr_dev;
- struct device *dev = &pdev->dev;
-
- hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
- if (!hr_dev)
- return -ENOMEM;
-
- hr_dev->priv = kzalloc(sizeof(struct hns_roce_v1_priv), GFP_KERNEL);
- if (!hr_dev->priv) {
- ret = -ENOMEM;
- goto error_failed_kzalloc;
- }
-
- hr_dev->pdev = pdev;
- hr_dev->dev = dev;
- platform_set_drvdata(pdev, hr_dev);
-
- if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) &&
- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) {
- dev_err(dev, "Not usable DMA addressing mode\n");
- ret = -EIO;
- goto error_failed_get_cfg;
- }
-
- ret = hns_roce_get_cfg(hr_dev);
- if (ret) {
- dev_err(dev, "Get Configuration failed!\n");
- goto error_failed_get_cfg;
- }
-
- ret = hns_roce_init(hr_dev);
- if (ret) {
- dev_err(dev, "RoCE engine init failed!\n");
- goto error_failed_get_cfg;
- }
-
- return 0;
-
-error_failed_get_cfg:
- kfree(hr_dev->priv);
-
-error_failed_kzalloc:
- ib_dealloc_device(&hr_dev->ib_dev);
-
- return ret;
-}
-
-/**
- * hns_roce_remove - remove RoCE device
- * @pdev: pointer to platform device
- */
-static int hns_roce_remove(struct platform_device *pdev)
-{
- struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev);
-
- hns_roce_exit(hr_dev);
- kfree(hr_dev->priv);
- ib_dealloc_device(&hr_dev->ib_dev);
-
- return 0;
-}
-
-static struct platform_driver hns_roce_driver = {
- .probe = hns_roce_probe,
- .remove = hns_roce_remove,
- .driver = {
- .name = DRV_NAME,
- .of_match_table = hns_roce_of_match,
- .acpi_match_table = ACPI_PTR(hns_roce_acpi_match),
- },
-};
-
-module_platform_driver(hns_roce_driver);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
-MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>");
-MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>");
-MODULE_DESCRIPTION("Hisilicon Hip06 Family RoCE Driver");
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
deleted file mode 100644
index 60fdcbae6729..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ /dev/null
@@ -1,1147 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _HNS_ROCE_HW_V1_H
-#define _HNS_ROCE_HW_V1_H
-
-#define CQ_STATE_VALID 2
-
-#define HNS_ROCE_V1_MAX_PD_NUM 0x8000
-#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000
-#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000
-
-#define HNS_ROCE_V1_MAX_QP_NUM 0x40000
-#define HNS_ROCE_V1_MAX_WQE_NUM 0x4000
-
-#define HNS_ROCE_V1_MAX_MTPT_NUM 0x80000
-
-#define HNS_ROCE_V1_MAX_MTT_SEGS 0x100000
-
-#define HNS_ROCE_V1_MAX_QP_INIT_RDMA 128
-#define HNS_ROCE_V1_MAX_QP_DEST_RDMA 128
-
-#define HNS_ROCE_V1_MAX_SQ_DESC_SZ 64
-#define HNS_ROCE_V1_MAX_RQ_DESC_SZ 64
-#define HNS_ROCE_V1_SG_NUM 2
-#define HNS_ROCE_V1_INLINE_SIZE 32
-
-#define HNS_ROCE_V1_UAR_NUM 256
-#define HNS_ROCE_V1_PHY_UAR_NUM 8
-
-#define HNS_ROCE_V1_GID_NUM 16
-#define HNS_ROCE_V1_RESV_QP 8
-
-#define HNS_ROCE_V1_MAX_IRQ_NUM 34
-#define HNS_ROCE_V1_COMP_VEC_NUM 32
-#define HNS_ROCE_V1_AEQE_VEC_NUM 1
-#define HNS_ROCE_V1_ABNORMAL_VEC_NUM 1
-
-#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000
-#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400
-
-#define HNS_ROCE_V1_QPC_SIZE 256
-#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
-#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64
-#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64
-#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64
-
-#define HNS_ROCE_V1_CQE_SIZE 32
-#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000
-
-#define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17)
-
-#define HNS_ROCE_V1_EXT_RAQ_WF 8
-#define HNS_ROCE_V1_RAQ_ENTRY 64
-#define HNS_ROCE_V1_RAQ_DEPTH 32768
-#define HNS_ROCE_V1_RAQ_SIZE (HNS_ROCE_V1_RAQ_ENTRY * HNS_ROCE_V1_RAQ_DEPTH)
-
-#define HNS_ROCE_V1_SDB_DEPTH 0x400
-#define HNS_ROCE_V1_ODB_DEPTH 0x400
-
-#define HNS_ROCE_V1_DB_RSVD 0x80
-
-#define HNS_ROCE_V1_SDB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_SDB_ALFUL (HNS_ROCE_V1_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_ODB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_ODB_ALFUL (HNS_ROCE_V1_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-
-#define HNS_ROCE_V1_EXT_SDB_DEPTH 0x4000
-#define HNS_ROCE_V1_EXT_ODB_DEPTH 0x4000
-#define HNS_ROCE_V1_EXT_SDB_ENTRY 16
-#define HNS_ROCE_V1_EXT_ODB_ENTRY 16
-#define HNS_ROCE_V1_EXT_SDB_SIZE \
- (HNS_ROCE_V1_EXT_SDB_DEPTH * HNS_ROCE_V1_EXT_SDB_ENTRY)
-#define HNS_ROCE_V1_EXT_ODB_SIZE \
- (HNS_ROCE_V1_EXT_ODB_DEPTH * HNS_ROCE_V1_EXT_ODB_ENTRY)
-
-#define HNS_ROCE_V1_EXT_SDB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_EXT_SDB_ALFUL \
- (HNS_ROCE_V1_EXT_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_EXT_ODB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_EXT_ODB_ALFUL \
- (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-
-#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000
-#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000
-#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5
-#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20
-
-#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17)
-
-#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2
-#define HNS_ROCE_V1_TPTR_BUF_SIZE \
- (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
-
-#define HNS_ROCE_ODB_POLL_MODE 0
-
-#define HNS_ROCE_SDB_NORMAL_MODE 0
-#define HNS_ROCE_SDB_EXTEND_MODE 1
-
-#define HNS_ROCE_ODB_EXTEND_MODE 1
-
-#define KEY_VALID 0x02
-
-#define HNS_ROCE_CQE_QPN_MASK 0x3ffff
-#define HNS_ROCE_CQE_STATUS_MASK 0x1f
-#define HNS_ROCE_CQE_OPCODE_MASK 0xf
-
-#define HNS_ROCE_CQE_SUCCESS 0x00
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03
-#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04
-#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05
-#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a
-#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b
-#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c
-
-#define QP1C_CFGN_OFFSET 0x28
-#define PHY_PORT_OFFSET 0x8
-#define MTPT_IDX_SHIFT 16
-#define ALL_PORT_VAL_OPEN 0x3f
-#define POL_TIME_INTERVAL_VAL 0x80
-#define SLEEP_TIME_INTERVAL 20
-#define SQ_PSN_SHIFT 8
-#define QKEY_VAL 0x80010000
-#define SDB_INV_CNT_OFFSET 8
-
-#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
-#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
-
-#define HNS_ROCE_INT_MASK_DISABLE 0
-#define HNS_ROCE_INT_MASK_ENABLE 1
-
-#define CEQ_REG_OFFSET 0x18
-
-#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
-
-#define HNS_ROCE_V1_CONS_IDX_M GENMASK(15, 0)
-
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M GENMASK(31, 16)
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M GENMASK(23, 16)
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M GENMASK(30, 24)
-
-#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M GENMASK(23, 0)
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M GENMASK(27, 25)
-
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M GENMASK(15, 0)
-
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0)
-
-/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
-enum {
- HNS_ROCE_LWQCE_QPC_ERROR = 1,
- HNS_ROCE_LWQCE_MTU_ERROR,
- HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR,
- HNS_ROCE_LWQCE_WQE_ADDR_ERROR,
- HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR,
- HNS_ROCE_LWQCE_SL_ERROR,
- HNS_ROCE_LWQCE_PORT_ERROR,
-};
-
-/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
-enum {
- HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
- HNS_ROCE_LAVWQE_LENGTH_ERROR,
- HNS_ROCE_LAVWQE_VA_ERROR,
- HNS_ROCE_LAVWQE_PD_ERROR,
- HNS_ROCE_LAVWQE_RW_ACC_ERROR,
- HNS_ROCE_LAVWQE_KEY_STATE_ERROR,
- HNS_ROCE_LAVWQE_MR_OPERATION_ERROR,
-};
-
-/* DOORBELL overflow subtype */
-enum {
- HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF,
- HNS_ROCE_DB_SUBTYPE_ODB_OVF,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP,
-};
-
-enum {
- /* RQ&SRQ related operations */
- HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
- HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE,
-};
-
-enum {
- HNS_ROCE_PORT_DOWN = 0,
- HNS_ROCE_PORT_UP,
-};
-
-struct hns_roce_cq_context {
- __le32 cqc_byte_4;
- __le32 cq_bt_l;
- __le32 cqc_byte_12;
- __le32 cur_cqe_ba0_l;
- __le32 cqc_byte_20;
- __le32 cqe_tptr_addr_l;
- __le32 cur_cqe_ba1_l;
- __le32 cqc_byte_32;
-};
-
-#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0
-#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \
- (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S)
-
-#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16
-#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \
- (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \
- (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24
-#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16
-#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8
-#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9
-
-#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8
-#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14
-#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15
-
-#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16
-#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S)
-
-struct hns_roce_cqe {
- __le32 cqe_byte_4;
- union {
- __le32 r_key;
- __le32 immediate_data;
- };
- __le32 byte_cnt;
- __le32 cqe_byte_16;
- __le32 cqe_byte_20;
- __le32 s_mac_l;
- __le32 cqe_byte_28;
- __le32 reserved;
-};
-
-#define CQE_BYTE_4_OWNER_S 7
-#define CQE_BYTE_4_SQ_RQ_FLAG_S 14
-
-#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8
-#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \
- (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S)
-
-#define CQE_BYTE_4_WQE_INDEX_S 16
-#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S)
-
-#define CQE_BYTE_4_OPERATION_TYPE_S 0
-#define CQE_BYTE_4_OPERATION_TYPE_M \
- (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S)
-
-#define CQE_BYTE_4_IMM_INDICATOR_S 15
-
-#define CQE_BYTE_16_LOCAL_QPN_S 0
-#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S)
-
-#define CQE_BYTE_20_PORT_NUM_S 26
-#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S)
-
-#define CQE_BYTE_20_SL_S 24
-#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S)
-
-#define CQE_BYTE_20_REMOTE_QPN_S 0
-#define CQE_BYTE_20_REMOTE_QPN_M \
- (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S)
-
-#define CQE_BYTE_20_GRH_PRESENT_S 29
-
-#define CQE_BYTE_28_P_KEY_IDX_S 16
-#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S)
-
-#define CQ_DB_REQ_NOT_SOL 0
-#define CQ_DB_REQ_NOT (1 << 16)
-
-struct hns_roce_v1_mpt_entry {
- __le32 mpt_byte_4;
- __le32 pbl_addr_l;
- __le32 mpt_byte_12;
- __le32 virt_addr_l;
- __le32 virt_addr_h;
- __le32 length;
- __le32 mpt_byte_28;
- __le32 pa0_l;
- __le32 mpt_byte_36;
- __le32 mpt_byte_40;
- __le32 mpt_byte_44;
- __le32 mpt_byte_48;
- __le32 pa4_l;
- __le32 mpt_byte_56;
- __le32 mpt_byte_60;
- __le32 mpt_byte_64;
-};
-
-#define MPT_BYTE_4_KEY_STATE_S 0
-#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S)
-
-#define MPT_BYTE_4_KEY_S 8
-#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S)
-
-#define MPT_BYTE_4_PAGE_SIZE_S 16
-#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S)
-
-#define MPT_BYTE_4_MW_TYPE_S 20
-
-#define MPT_BYTE_4_MW_BIND_ENABLE_S 21
-
-#define MPT_BYTE_4_OWN_S 22
-
-#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24
-#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \
- (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S)
-
-#define MPT_BYTE_4_REMOTE_ATOMIC_S 26
-#define MPT_BYTE_4_LOCAL_WRITE_S 27
-#define MPT_BYTE_4_REMOTE_WRITE_S 28
-#define MPT_BYTE_4_REMOTE_READ_S 29
-#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30
-#define MPT_BYTE_4_ADDRESS_TYPE_S 31
-
-#define MPT_BYTE_12_PBL_ADDR_H_S 0
-#define MPT_BYTE_12_PBL_ADDR_H_M \
- (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S)
-
-#define MPT_BYTE_12_MW_BIND_COUNTER_S 17
-#define MPT_BYTE_12_MW_BIND_COUNTER_M \
- (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S)
-
-#define MPT_BYTE_28_PD_S 0
-#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S)
-
-#define MPT_BYTE_28_L_KEY_IDX_L_S 16
-#define MPT_BYTE_28_L_KEY_IDX_L_M \
- (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S)
-
-#define MPT_BYTE_36_PA0_H_S 0
-#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S)
-
-#define MPT_BYTE_36_PA1_L_S 8
-#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S)
-
-#define MPT_BYTE_40_PA1_H_S 0
-#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S)
-
-#define MPT_BYTE_40_PA2_L_S 16
-#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S)
-
-#define MPT_BYTE_44_PA2_H_S 0
-#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S)
-
-#define MPT_BYTE_44_PA3_L_S 24
-#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S)
-
-#define MPT_BYTE_48_PA3_H_S 0
-#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S)
-
-#define MPT_BYTE_56_PA4_H_S 0
-#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S)
-
-#define MPT_BYTE_56_PA5_L_S 8
-#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S)
-
-#define MPT_BYTE_60_PA5_H_S 0
-#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S)
-
-#define MPT_BYTE_60_PA6_L_S 16
-#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S)
-
-#define MPT_BYTE_64_PA6_H_S 0
-#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S)
-
-#define MPT_BYTE_64_L_KEY_IDX_H_S 24
-#define MPT_BYTE_64_L_KEY_IDX_H_M \
- (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S)
-
-struct hns_roce_wqe_ctrl_seg {
- __le32 sgl_pa_h;
- __le32 flag;
- union {
- __be32 imm_data;
- __le32 inv_key;
- };
- __le32 msg_length;
-};
-
-struct hns_roce_wqe_data_seg {
- __le64 addr;
- __le32 lkey;
- __le32 len;
-};
-
-struct hns_roce_wqe_raddr_seg {
- __le32 rkey;
- __le32 len; /* reserved */
- __le64 raddr;
-};
-
-struct hns_roce_rq_wqe_ctrl {
- __le32 rwqe_byte_4;
- __le32 rocee_sgl_ba_l;
- __le32 rwqe_byte_12;
- __le32 reserved[5];
-};
-
-#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16
-#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \
- (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S)
-
-#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000
-
-#define GID_LEN 16
-
-struct hns_roce_ud_send_wqe {
- __le32 dmac_h;
- __le32 u32_8;
- __le32 immediate_data;
-
- __le32 u32_16;
- union {
- unsigned char dgid[GID_LEN];
- struct {
- __le32 u32_20;
- __le32 u32_24;
- __le32 u32_28;
- __le32 u32_32;
- };
- };
-
- __le32 u32_36;
- __le32 u32_40;
-
- __le32 va0_l;
- __le32 va0_h;
- __le32 l_key0;
-
- __le32 va1_l;
- __le32 va1_h;
- __le32 l_key1;
-};
-
-#define UD_SEND_WQE_U32_4_DMAC_0_S 0
-#define UD_SEND_WQE_U32_4_DMAC_0_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_1_S 8
-#define UD_SEND_WQE_U32_4_DMAC_1_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_2_S 16
-#define UD_SEND_WQE_U32_4_DMAC_2_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_3_S 24
-#define UD_SEND_WQE_U32_4_DMAC_3_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S)
-
-#define UD_SEND_WQE_U32_8_DMAC_4_S 0
-#define UD_SEND_WQE_U32_8_DMAC_4_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S)
-
-#define UD_SEND_WQE_U32_8_DMAC_5_S 8
-#define UD_SEND_WQE_U32_8_DMAC_5_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
-
-#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
-
-#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
-#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \
- (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-
-#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24
-#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \
- (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S)
-
-#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31
-
-#define UD_SEND_WQE_U32_16_DEST_QP_S 0
-#define UD_SEND_WQE_U32_16_DEST_QP_M \
- (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S)
-
-#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24
-#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S)
-
-#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0
-#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \
- (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S)
-
-#define UD_SEND_WQE_U32_36_PRIORITY_S 20
-#define UD_SEND_WQE_U32_36_PRIORITY_M \
- (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S)
-
-#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24
-#define UD_SEND_WQE_U32_36_SGID_INDEX_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S)
-
-#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0
-#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S)
-
-#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8
-#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S)
-
-struct hns_roce_sqp_context {
- __le32 qp1c_bytes_4;
- __le32 sq_rq_bt_l;
- __le32 qp1c_bytes_12;
- __le32 qp1c_bytes_16;
- __le32 qp1c_bytes_20;
- __le32 cur_rq_wqe_ba_l;
- __le32 qp1c_bytes_28;
- __le32 qp1c_bytes_32;
- __le32 cur_sq_wqe_ba_l;
- __le32 qp1c_bytes_40;
-};
-
-#define QP1C_BYTES_4_QP_STATE_S 0
-#define QP1C_BYTES_4_QP_STATE_M \
- (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
-
-#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
-#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
-
-#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12
-#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S)
-
-#define QP1C_BYTES_4_PD_S 16
-#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S)
-
-#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0
-#define QP1C_BYTES_12_SQ_RQ_BT_H_M \
- (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S)
-
-#define QP1C_BYTES_16_RQ_HEAD_S 0
-#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S)
-
-#define QP1C_BYTES_16_PORT_NUM_S 16
-#define QP1C_BYTES_16_PORT_NUM_M \
- (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S)
-
-#define QP1C_BYTES_16_SIGNALING_TYPE_S 27
-#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28
-#define QP1C_BYTES_16_RQ_BA_FLG_S 29
-#define QP1C_BYTES_16_SQ_BA_FLG_S 30
-#define QP1C_BYTES_16_QP1_ERR_S 31
-
-#define QP1C_BYTES_20_SQ_HEAD_S 0
-#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S)
-
-#define QP1C_BYTES_20_PKEY_IDX_S 16
-#define QP1C_BYTES_20_PKEY_IDX_M \
- (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S)
-
-#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0
-#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S)
-
-#define QP1C_BYTES_28_RQ_CUR_IDX_S 16
-#define QP1C_BYTES_28_RQ_CUR_IDX_M \
- (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S)
-
-#define QP1C_BYTES_32_TX_CQ_NUM_S 0
-#define QP1C_BYTES_32_TX_CQ_NUM_M \
- (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S)
-
-#define QP1C_BYTES_32_RX_CQ_NUM_S 16
-#define QP1C_BYTES_32_RX_CQ_NUM_M \
- (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S)
-
-#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0
-#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S)
-
-#define QP1C_BYTES_40_SQ_CUR_IDX_S 16
-#define QP1C_BYTES_40_SQ_CUR_IDX_M \
- (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S)
-
-#define HNS_ROCE_WQE_INLINE (1UL<<31)
-#define HNS_ROCE_WQE_SE (1UL<<30)
-
-#define HNS_ROCE_WQE_SGE_NUM_BIT 24
-#define HNS_ROCE_WQE_IMM (1UL<<23)
-#define HNS_ROCE_WQE_FENCE (1UL<<21)
-#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20)
-
-#define HNS_ROCE_WQE_OPCODE_SEND (0<<16)
-#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16)
-#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16)
-#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16)
-#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16)
-#define HNS_ROCE_WQE_OPCODE_MASK (15<<16)
-
-struct hns_roce_qp_context {
- __le32 qpc_bytes_4;
- __le32 qpc_bytes_8;
- __le32 qpc_bytes_12;
- __le32 qpc_bytes_16;
- __le32 sq_rq_bt_l;
- __le32 qpc_bytes_24;
- __le32 irrl_ba_l;
- __le32 qpc_bytes_32;
- __le32 qpc_bytes_36;
- __le32 dmac_l;
- __le32 qpc_bytes_44;
- __le32 qpc_bytes_48;
- u8 dgid[16];
- __le32 qpc_bytes_68;
- __le32 cur_rq_wqe_ba_l;
- __le32 qpc_bytes_76;
- __le32 rx_rnr_time;
- __le32 qpc_bytes_84;
- __le32 qpc_bytes_88;
- union {
- __le32 rx_sge_len;
- __le32 dma_length;
- };
- union {
- __le32 rx_sge_num;
- __le32 rx_send_pktn;
- __le32 r_key;
- };
- __le32 va_l;
- __le32 va_h;
- __le32 qpc_bytes_108;
- __le32 qpc_bytes_112;
- __le32 rx_cur_sq_wqe_ba_l;
- __le32 qpc_bytes_120;
- __le32 qpc_bytes_124;
- __le32 qpc_bytes_128;
- __le32 qpc_bytes_132;
- __le32 qpc_bytes_136;
- __le32 qpc_bytes_140;
- __le32 qpc_bytes_144;
- __le32 qpc_bytes_148;
- union {
- __le32 rnr_retry;
- __le32 ack_time;
- };
- __le32 qpc_bytes_156;
- __le32 pkt_use_len;
- __le32 qpc_bytes_164;
- __le32 qpc_bytes_168;
- union {
- __le32 sge_use_len;
- __le32 pa_use_len;
- };
- __le32 qpc_bytes_176;
- __le32 qpc_bytes_180;
- __le32 tx_cur_sq_wqe_ba_l;
- __le32 qpc_bytes_188;
- __le32 rvd21;
-};
-
-#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0
-#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3
-#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4
-#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5
-#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6
-#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7
-
-#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8
-#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S)
-
-#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12
-#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S)
-
-#define QP_CONTEXT_QPC_BYTES_4_PD_S 16
-#define QP_CONTEXT_QPC_BYTES_4_PD_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S)
-
-#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0
-#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S)
-
-#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16
-#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S)
-
-#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0
-#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S)
-
-#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0
-#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S)
-
-#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0
-#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \
- (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18
-#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)
-
-#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23
-
-#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \
- (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18
-#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S)
-
-#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20
-#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21
-#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22
-#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23
-
-#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24
-#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S)
-
-#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0
-#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S)
-
-#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24
-#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0
-#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16
-#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24
-#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0
-#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \
- (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20
-#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28
-#define QP_CONTEXT_QPC_BYTES_48_MTU_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S)
-
-#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0
-#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8
-#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24
-#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24
-#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25
-
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \
- (((1UL << 2) - 1) << \
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S)
-
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25
-
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S)
-
-#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0
-#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16
-#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24
-
-#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25
-#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27
-
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S)
-
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31
-
-#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0
-#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0
-#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2
-#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5
-#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8
-#define QP_CONTEXT_QPC_BYTES_148_LSN_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0
-#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3
-#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8
-#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11
-#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_SL_S 14
-#define QP_CONTEXT_QPC_BYTES_156_SL_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16
-#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24
-#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S)
-
-#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24
-#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24
-#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26
-#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28
-#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29
-#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30
-
-#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0
-#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0
-#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8
-
-#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S)
-
-#define STATUS_MASK 0xff
-#define GO_BIT_TIMEOUT_MSECS 10000
-#define HCR_STATUS_OFFSET 0x18
-#define HCR_GO_BIT 15
-
-struct hns_roce_rq_db {
- __le32 u32_4;
- __le32 u32_8;
-};
-
-#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0
-#define RQ_DOORBELL_U32_4_RQ_HEAD_M \
- (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S)
-
-#define RQ_DOORBELL_U32_8_QPN_S 0
-#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S)
-
-#define RQ_DOORBELL_U32_8_CMD_S 28
-#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S)
-
-#define RQ_DOORBELL_U32_8_HW_SYNC_S 31
-
-struct hns_roce_sq_db {
- __le32 u32_4;
- __le32 u32_8;
-};
-
-#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0
-#define SQ_DOORBELL_U32_4_SQ_HEAD_M \
- (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
-
-#define SQ_DOORBELL_U32_4_SL_S 16
-#define SQ_DOORBELL_U32_4_SL_M \
- (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S)
-
-#define SQ_DOORBELL_U32_4_PORT_S 18
-#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
-
-#define SQ_DOORBELL_U32_8_QPN_S 0
-#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S)
-
-#define SQ_DOORBELL_HW_SYNC_S 31
-
-struct hns_roce_ext_db {
- int esdb_dep;
- int eodb_dep;
- struct hns_roce_buf_list *sdb_buf_list;
- struct hns_roce_buf_list *odb_buf_list;
-};
-
-struct hns_roce_db_table {
- int sdb_ext_mod;
- int odb_ext_mod;
- struct hns_roce_ext_db *ext_db;
-};
-
-#define HW_SYNC_SLEEP_TIME_INTERVAL 20
-#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL)
-#define BT_CMD_SYNC_SHIFT 31
-#define HNS_ROCE_BA_SIZE (32 * 4096)
-
-struct hns_roce_bt_table {
- struct hns_roce_buf_list qpc_buf;
- struct hns_roce_buf_list mtpt_buf;
- struct hns_roce_buf_list cqc_buf;
-};
-
-struct hns_roce_tptr_table {
- struct hns_roce_buf_list tptr_buf;
-};
-
-struct hns_roce_qp_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct hns_roce_qp *qp;
- u32 db_wait_stage;
- u32 sdb_issue_ptr;
- u32 sdb_inv_cnt;
- u32 sche_cnt;
-};
-
-struct hns_roce_mr_free_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct completion *comp;
- int comp_flag;
- void *mr;
-};
-
-struct hns_roce_recreate_lp_qp_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct completion *comp;
- int comp_flag;
-};
-
-struct hns_roce_free_mr {
- struct workqueue_struct *free_mr_wq;
- struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP];
- struct hns_roce_cq *mr_free_cq;
- struct hns_roce_pd *mr_free_pd;
-};
-
-struct hns_roce_v1_priv {
- struct hns_roce_db_table db_table;
- struct hns_roce_raq_table raq_table;
- struct hns_roce_bt_table bt_table;
- struct hns_roce_tptr_table tptr_table;
- struct hns_roce_free_mr free_mr;
-};
-
-int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
-int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
-
-#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index bbfa1332dedc..1435fe2ea176 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -55,6 +55,42 @@ enum {
CMD_RST_PRC_EBUSY,
};
+enum ecc_resource_type {
+ ECC_RESOURCE_QPC,
+ ECC_RESOURCE_CQC,
+ ECC_RESOURCE_MPT,
+ ECC_RESOURCE_SRQC,
+ ECC_RESOURCE_GMV,
+ ECC_RESOURCE_QPC_TIMER,
+ ECC_RESOURCE_CQC_TIMER,
+ ECC_RESOURCE_SCCC,
+ ECC_RESOURCE_COUNT,
+};
+
+static const struct {
+ const char *name;
+ u8 read_bt0_op;
+ u8 write_bt0_op;
+} fmea_ram_res[] = {
+ { "ECC_RESOURCE_QPC",
+ HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 },
+ { "ECC_RESOURCE_CQC",
+ HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 },
+ { "ECC_RESOURCE_MPT",
+ HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 },
+ { "ECC_RESOURCE_SRQC",
+ HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 },
+ /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */
+ { "ECC_RESOURCE_GMV",
+ 0, 0 },
+ { "ECC_RESOURCE_QPC_TIMER",
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 },
+ { "ECC_RESOURCE_CQC_TIMER",
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 },
+ { "ECC_RESOURCE_SCCC",
+ HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 },
+};
+
static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
struct ib_sge *sg)
{
@@ -82,7 +118,6 @@ static const u32 hns_roce_op_code[] = {
HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
- HR_OPC_MAP(LOCAL_INV, LOCAL_INV),
HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
HR_OPC_MAP(REG_MR, FAST_REG_PMR),
@@ -149,8 +184,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
aseg->cmp_data = 0;
}
- roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
}
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
@@ -158,8 +192,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
unsigned int *sge_idx, u32 msg_len)
{
struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
- unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg);
- unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len;
+ unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE;
unsigned int left_len_in_pg;
unsigned int idx = *sge_idx;
unsigned int i = 0;
@@ -187,7 +220,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
if (len <= left_len_in_pg) {
memcpy(dseg, addr, len);
- idx += len / dseg_len;
+ idx += len / HNS_ROCE_SGE_SIZE;
i++;
if (i >= wr->num_sge)
@@ -202,7 +235,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
len -= left_len_in_pg;
addr += left_len_in_pg;
- idx += left_len_in_pg / dseg_len;
+ idx += left_len_in_pg / HNS_ROCE_SGE_SIZE;
dseg = hns_roce_get_extend_sge(qp,
idx & (qp->sge.sge_cnt - 1));
left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
@@ -271,8 +304,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
- roce_set_bit(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
+ hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
for (i = 0; i < wr->num_sge; i++) {
memcpy(dseg, ((void *)wr->sg_list[i].addr),
@@ -280,17 +312,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
dseg += wr->sg_list[i].length;
}
} else {
- roce_set_bit(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
if (ret)
return ret;
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
- curr_idx - *sge_idx);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
}
*sge_idx = curr_idx;
@@ -309,12 +337,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
int j = 0;
int i;
- roce_set_field(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
!!(wr->send_flags & IB_SEND_INLINE));
if (wr->send_flags & IB_SEND_INLINE)
return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
@@ -339,9 +365,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
valid_num_sge - HNS_ROCE_SGE_IN_WQE);
}
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
return 0;
}
@@ -355,7 +379,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
if (unlikely(ibqp->qp_type != IB_QPT_RC &&
ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_UD)) {
- ibdev_err(ibdev, "Not supported QP(0x%x)type!\n",
+ ibdev_err(ibdev, "not supported QP(0x%x)type!\n",
ibqp->qp_type);
return -EOPNOTSUPP;
} else if (unlikely(hr_qp->state == IB_QPS_RESET ||
@@ -412,8 +436,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
ud_sq_wqe->immtdata = get_immtdata(wr);
- roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return 0;
}
@@ -424,21 +447,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
struct ib_device *ib_dev = ah->ibah.device;
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
-
- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
- V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
- V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
- V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
return -EINVAL;
- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
- V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
ud_sq_wqe->sgid_index = ah->av.gid_index;
@@ -448,10 +465,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
return 0;
- roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
- ah->av.vlan_en);
- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
- V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
return 0;
}
@@ -476,27 +491,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
!!(wr->send_flags & IB_SEND_SIGNALED));
-
- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
!!(wr->send_flags & IB_SEND_SOLICITED));
- roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
- V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
-
- roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
-
- roce_set_field(ud_sq_wqe->byte_20,
- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- curr_idx & (qp->sge.sge_cnt - 1));
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
+ curr_idx & (qp->sge.sge_cnt - 1));
ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
qp->qkey : ud_wr(wr)->remote_qkey);
- roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
- V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
ret = fill_ud_av(ud_sq_wqe, ah);
if (ret)
@@ -516,8 +523,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
dma_wmb();
*sge_idx = curr_idx;
- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
- owner_bit);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
return 0;
}
@@ -552,9 +558,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
else
ret = -EOPNOTSUPP;
break;
- case IB_WR_LOCAL_INV:
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
- fallthrough;
case IB_WR_SEND_WITH_INV:
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
break;
@@ -565,11 +568,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
if (unlikely(ret))
return ret;
- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return ret;
}
+
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
@@ -590,13 +593,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (WARN_ON(ret))
return ret;
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
(wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -616,8 +619,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
dma_wmb();
*sge_idx = curr_idx;
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
- owner_bit);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
return ret;
}
@@ -630,7 +632,7 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
} else {
struct hns_roce_v2_db sq_db = {};
- hr_reg_write(&sq_db, DB_TAG, qp->doorbell_qpn);
+ hr_reg_write(&sq_db, DB_TAG, qp->qpn);
hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB);
hr_reg_write(&sq_db, DB_PI, qp->sq.head);
hr_reg_write(&sq_db, DB_SL, qp->sl);
@@ -678,16 +680,15 @@ static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
void *wqe)
{
+#define HNS_ROCE_SL_SHIFT 2
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
/* All kinds of DirectWQE have the same header field layout */
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
- V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
- V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2);
- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
- V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
+ qp->sl >> HNS_ROCE_SL_SHIFT);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
}
@@ -1263,6 +1264,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
return tail == priv->cmq.csq.head;
}
+static void update_cmdq_status(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+
+ if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
+ hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
+}
+
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
@@ -1294,7 +1305,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
} while (++timeout < priv->cmq.tx_timeout);
if (hns_roce_cmq_csq_done(hr_dev)) {
- for (ret = 0, i = 0; i < num; i++) {
+ ret = 0;
+ for (i = 0; i < num; i++) {
/* check the result of hardware write back */
desc[i] = csq->desc[tail++];
if (tail == csq->desc_num)
@@ -1305,17 +1317,19 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
continue;
dev_err_ratelimited(hr_dev->dev,
- "Cmdq IO error, opcode = %x, return = %x\n",
+ "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
desc->opcode, desc_ret);
ret = -EIO;
}
} else {
/* FW/HW reset or incorrect number of desc */
tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
- dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n",
+ dev_warn(hr_dev->dev, "CMDQ move tail from %u to %u.\n",
csq->head, tail);
csq->head = tail;
+ update_cmdq_status(hr_dev);
+
ret = -EAGAIN;
}
@@ -1330,6 +1344,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
bool busy;
int ret;
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
if (!v2_chk_mbox_is_avail(hr_dev, &busy))
return busy ? -EBUSY : 0;
@@ -1342,17 +1359,17 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
return ret;
}
-static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj,
- dma_addr_t base_addr, u16 op)
+static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev,
+ dma_addr_t base_addr, u8 cmd, unsigned long tag)
{
- struct hns_roce_cmd_mailbox *mbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ struct hns_roce_cmd_mailbox *mbox;
int ret;
+ mbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mbox))
return PTR_ERR(mbox);
- ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, obj, 0, op,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, cmd, tag);
hns_roce_free_cmd_mailbox(hr_dev, mbox);
return ret;
}
@@ -1384,20 +1401,20 @@ static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev,
hr_dev->dis_db = true;
dev_warn(hr_dev->dev,
- "Func clear is pending, device in resetting state.\n");
+ "func clear is pending, device in resetting state.\n");
end = HNS_ROCE_V2_HW_RST_TIMEOUT;
while (end) {
if (!ops->get_hw_reset_stat(handle)) {
hr_dev->is_reset = true;
dev_info(hr_dev->dev,
- "Func clear success after reset.\n");
+ "func clear success after reset.\n");
return;
}
msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
}
- dev_warn(hr_dev->dev, "Func clear failed.\n");
+ dev_warn(hr_dev->dev, "func clear failed.\n");
}
static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev,
@@ -1409,21 +1426,21 @@ static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev,
hr_dev->dis_db = true;
dev_warn(hr_dev->dev,
- "Func clear is pending, device in resetting state.\n");
+ "func clear is pending, device in resetting state.\n");
end = HNS_ROCE_V2_HW_RST_TIMEOUT;
while (end) {
if (ops->ae_dev_reset_cnt(handle) !=
hr_dev->reset_cnt) {
hr_dev->is_reset = true;
dev_info(hr_dev->dev,
- "Func clear success after sw reset\n");
+ "func clear success after sw reset\n");
return;
}
msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
}
- dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n");
+ dev_warn(hr_dev->dev, "func clear failed because of unfinished sw reset\n");
}
static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval,
@@ -1436,7 +1453,7 @@ static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval,
if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) {
hr_dev->dis_db = true;
hr_dev->is_reset = true;
- dev_info(hr_dev->dev, "Func clear success after reset.\n");
+ dev_info(hr_dev->dev, "func clear success after reset.\n");
return;
}
@@ -1453,9 +1470,9 @@ static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval,
if (retval && !flag)
dev_warn(hr_dev->dev,
- "Func clear read failed, ret = %d.\n", retval);
+ "func clear read failed, ret = %d.\n", retval);
- dev_warn(hr_dev->dev, "Func clear failed.\n");
+ dev_warn(hr_dev->dev, "func clear failed.\n");
}
static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
@@ -1476,7 +1493,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
fclr_write_fail_flag = true;
- dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n",
+ dev_err(hr_dev->dev, "func clear write failed, ret = %d.\n",
ret);
goto out;
}
@@ -1497,7 +1514,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
if (ret)
continue;
- if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) {
+ if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) {
if (vf_id == 0)
hr_dev->is_reset = true;
return;
@@ -1508,7 +1525,7 @@ out:
hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag);
}
-static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
+static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
{
enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
struct hns_roce_cmq_desc desc[2];
@@ -1519,17 +1536,29 @@ static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id);
- hns_roce_cmq_send(hr_dev, desc, 2);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
}
static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
{
+ int ret;
int i;
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return;
+
for (i = hr_dev->func_num - 1; i >= 0; i--) {
__hns_roce_function_clear(hr_dev, i);
- if (i != 0)
- hns_roce_free_vf_resource(hr_dev, i);
+
+ if (i == 0)
+ continue;
+
+ ret = hns_roce_free_vf_resource(hr_dev, i);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to free vf resource, vf_id = %d, ret = %d.\n",
+ i, ret);
}
}
@@ -1571,7 +1600,7 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
struct hns_roce_cmq_desc desc;
int ret;
- if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) {
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
hr_dev->func_num = 1;
return 0;
}
@@ -1594,11 +1623,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 clock_cycles_of_1us;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
false);
- hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
+ else
+ clock_cycles_of_1us = HNS_ROCE_1US_CFG;
+
+ hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -1749,17 +1784,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
swt = (struct hns_roce_vf_switch *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
- roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M,
- VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id);
+ hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
+ hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
+ hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
+ hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -1927,7 +1961,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
- caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
@@ -1938,14 +1971,13 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
- caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
- caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
+ caps->qpc_timer_bt_num = HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM;
+ caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ;
- caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
@@ -1997,7 +2029,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
- caps->flags |= HNS_ROCE_CAP_FLAG_STASH;
+ caps->flags |= HNS_ROCE_CAP_FLAG_STASH |
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE;
caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
} else {
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
@@ -2138,7 +2171,6 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
- caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
@@ -2146,15 +2178,17 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM;
caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM;
- caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
if (!caps->num_comp_vectors)
- caps->num_comp_vectors = min_t(u32, caps->eqc_bt_num - 1,
- (u32)priv->handle->rinfo.num_vectors - 2);
+ caps->num_comp_vectors =
+ min_t(u32, caps->eqc_bt_num - HNS_ROCE_V2_AEQE_VEC_NUM,
+ (u32)priv->handle->rinfo.num_vectors -
+ (HNS_ROCE_V2_AEQE_VEC_NUM + HNS_ROCE_V2_ABNORMAL_VEC_NUM));
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->eqe_hop_num = HNS_ROCE_V3_EQE_HOP_NUM;
caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
@@ -2175,6 +2209,7 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
} else {
u32 func_num = max_t(u32, 1, hr_dev->func_num);
+ caps->eqe_hop_num = HNS_ROCE_V2_EQE_HOP_NUM;
caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
caps->gid_table_len[0] /= func_num;
@@ -2231,16 +2266,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
- caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
- caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
- caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
caps->num_other_vectors = resp_a->num_other_vectors;
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
- caps->max_srq_desc_sz = resp_a->max_srq_desc_sz;
caps->cqe_sz = resp_a->cqe_sz;
caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
@@ -2260,87 +2291,39 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
ctx_hop_num = resp_b->ctx_hop_num;
pbl_hop_num = resp_b->pbl_hop_num;
- caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds,
- V2_QUERY_PF_CAPS_C_NUM_PDS_M,
- V2_QUERY_PF_CAPS_C_NUM_PDS_S);
- caps->flags = roce_get_field(resp_c->cap_flags_num_pds,
- V2_QUERY_PF_CAPS_C_CAP_FLAGS_M,
- V2_QUERY_PF_CAPS_C_CAP_FLAGS_S);
+ caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS);
+
+ caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS);
caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
HNS_ROCE_CAP_FLAGS_EX_SHIFT;
- caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs,
- V2_QUERY_PF_CAPS_C_NUM_CQS_M,
- V2_QUERY_PF_CAPS_C_NUM_CQS_S);
- caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs,
- V2_QUERY_PF_CAPS_C_MAX_GID_M,
- V2_QUERY_PF_CAPS_C_MAX_GID_S);
-
- caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth,
- V2_QUERY_PF_CAPS_C_CQ_DEPTH_M,
- V2_QUERY_PF_CAPS_C_CQ_DEPTH_S);
- caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws,
- V2_QUERY_PF_CAPS_C_NUM_MRWS_M,
- V2_QUERY_PF_CAPS_C_NUM_MRWS_S);
- caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps,
- V2_QUERY_PF_CAPS_C_NUM_QPS_M,
- V2_QUERY_PF_CAPS_C_NUM_QPS_S);
- caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps,
- V2_QUERY_PF_CAPS_C_MAX_ORD_M,
- V2_QUERY_PF_CAPS_C_MAX_ORD_S);
+ caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
+ caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
+ caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
+ caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
+ caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
+ caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
- caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs,
- V2_QUERY_PF_CAPS_D_NUM_SRQS_M,
- V2_QUERY_PF_CAPS_D_NUM_SRQS_S);
- caps->cong_type = roce_get_field(resp_d->wq_hop_num_max_srqs,
- V2_QUERY_PF_CAPS_D_CONG_TYPE_M,
- V2_QUERY_PF_CAPS_D_CONG_TYPE_S);
- caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
- caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth,
- V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M,
- V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S);
- caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth,
- V2_QUERY_PF_CAPS_D_NUM_CEQS_M,
- V2_QUERY_PF_CAPS_D_NUM_CEQS_S);
-
- caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth,
- V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M,
- V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S);
- caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
- V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M,
- V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S);
- caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
- V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M,
- V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S);
- caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds,
- V2_QUERY_PF_CAPS_D_RSV_PDS_M,
- V2_QUERY_PF_CAPS_D_RSV_PDS_S);
- caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds,
- V2_QUERY_PF_CAPS_D_NUM_UARS_M,
- V2_QUERY_PF_CAPS_D_NUM_UARS_S);
- caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps,
- V2_QUERY_PF_CAPS_D_RSV_QPS_M,
- V2_QUERY_PF_CAPS_D_RSV_QPS_S);
- caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps,
- V2_QUERY_PF_CAPS_D_RSV_UARS_M,
- V2_QUERY_PF_CAPS_D_RSV_UARS_S);
- caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
- V2_QUERY_PF_CAPS_E_RSV_MRWS_M,
- V2_QUERY_PF_CAPS_E_RSV_MRWS_S);
- caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
- V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M,
- V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S);
- caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs,
- V2_QUERY_PF_CAPS_E_RSV_CQS_M,
- V2_QUERY_PF_CAPS_E_RSV_CQS_S);
- caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs,
- V2_QUERY_PF_CAPS_E_RSV_SRQS_M,
- V2_QUERY_PF_CAPS_E_RSV_SRQS_S);
- caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey,
- V2_QUERY_PF_CAPS_E_RSV_LKEYS_M,
- V2_QUERY_PF_CAPS_E_RSV_LKEYS_S);
+ caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
+ caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
+ caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
+ caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
+ caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
+ caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST);
+ caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST);
+ caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
+ caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
+ caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
+ caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS);
+
+ caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
+ caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
+ caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
+ caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
+ caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
@@ -2355,15 +2338,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->cqe_hop_num = pbl_hop_num;
caps->srqwqe_hop_num = pbl_hop_num;
caps->idx_hop_num = pbl_hop_num;
- caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
- V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M,
- V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S);
- caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
- V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M,
- V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S);
- caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
- V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
- V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
+ caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM);
+ caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
+ caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
return 0;
}
@@ -2387,7 +2364,7 @@ static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
struct hns_roce_caps *caps = &hr_dev->caps;
int ret;
- if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
return 0;
ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_QPC_SIZE,
@@ -2654,6 +2631,198 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
}
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ if (free_mr->rsv_qp[i]) {
+ ret = ib_destroy_qp(free_mr->rsv_qp[i]);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to destroy qp in free mr.\n");
+
+ free_mr->rsv_qp[i] = NULL;
+ }
+ }
+
+ if (free_mr->rsv_cq) {
+ ib_destroy_cq(free_mr->rsv_cq);
+ free_mr->rsv_cq = NULL;
+ }
+
+ if (free_mr->rsv_pd) {
+ ib_dealloc_pd(free_mr->rsv_pd);
+ free_mr->rsv_pd = NULL;
+ }
+}
+
+static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_cq_init_attr cq_init_attr = {};
+ struct ib_qp_init_attr qp_init_attr = {};
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret;
+ int i;
+
+ pd = ib_alloc_pd(ibdev, 0);
+ if (IS_ERR(pd)) {
+ ibdev_err(ibdev, "failed to create pd for free mr.\n");
+ return PTR_ERR(pd);
+ }
+ free_mr->rsv_pd = pd;
+
+ cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+ cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
+ if (IS_ERR(cq)) {
+ ibdev_err(ibdev, "failed to create cq for free mr.\n");
+ ret = PTR_ERR(cq);
+ goto create_failed;
+ }
+ free_mr->rsv_cq = cq;
+
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.send_cq = free_mr->rsv_cq;
+ qp_init_attr.recv_cq = free_mr->rsv_cq;
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
+ qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
+ qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
+ qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
+
+ qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr);
+ if (IS_ERR(qp)) {
+ ibdev_err(ibdev, "failed to create qp for free mr.\n");
+ ret = PTR_ERR(qp);
+ goto create_failed;
+ }
+
+ free_mr->rsv_qp[i] = qp;
+ }
+
+ return 0;
+
+create_failed:
+ free_mr_exit(hr_dev);
+
+ return ret;
+}
+
+static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
+ struct ib_qp_attr *attr, int sl_num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ int loopback;
+ int mask;
+ int ret;
+
+ hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
+ hr_qp->free_mr_en = 1;
+
+ mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ loopback = hr_dev->loop_idc;
+ /* Set qpc lbi = 1 incidate loopback IO */
+ hr_dev->loop_idc = 1;
+
+ mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+ attr->qp_state = IB_QPS_RTR;
+ attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ attr->path_mtu = IB_MTU_256;
+ attr->dest_qp_num = hr_qp->qpn;
+ attr->rq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+
+ rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
+
+ ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+ hr_dev->loop_idc = loopback;
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
+ IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
+ attr->qp_state = IB_QPS_RTS;
+ attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+ attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
+ attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
+ ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+ if (ret)
+ ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int free_mr_modify_qp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_attr attr = {};
+ int ret;
+ int i;
+
+ rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_static_rate(&attr.ah_attr, 3);
+ rdma_ah_set_port_num(&attr.ah_attr, 1);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ ret = free_mr_modify_rsv_qp(hr_dev, &attr, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int free_mr_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ int ret;
+
+ mutex_init(&free_mr->mutex);
+
+ ret = free_mr_alloc_res(hr_dev);
+ if (ret)
+ return ret;
+
+ ret = free_mr_modify_qp(hr_dev);
+ if (ret)
+ goto err_modify_qp;
+
+ return 0;
+
+err_modify_qp:
+ free_mr_exit(hr_dev);
+
+ return ret;
+}
+
static int get_hem_table(struct hns_roce_dev *hr_dev)
{
unsigned int qpc_count;
@@ -2770,21 +2939,21 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
free_dip_list(hr_dev);
}
-static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier,
- u16 op, u16 token, int event)
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
- mb->in_param_l = cpu_to_le32(in_param);
- mb->in_param_h = cpu_to_le32(in_param >> 32);
- mb->out_param_l = cpu_to_le32(out_param);
- mb->out_param_h = cpu_to_le32(out_param >> 32);
- mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
- mb->token_event_en = cpu_to_le32(event << 16 | token);
+ mb->in_param_l = cpu_to_le32(mbox_msg->in_param);
+ mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32);
+ mb->out_param_l = cpu_to_le32(mbox_msg->out_param);
+ mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32);
+ mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd);
+ mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 |
+ mbox_msg->token);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -2802,6 +2971,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
mb_st = (struct hns_roce_mbox_status *)desc.data;
end = msecs_to_jiffies(timeout) + jiffies;
while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
status = 0;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
true);
@@ -2837,9 +3009,8 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
return ret;
}
-static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier,
- u16 op, u16 token, int event)
+static int v2_post_mbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
u8 status = 0;
int ret;
@@ -2855,8 +3026,7 @@ static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
}
/* Post new message to mbox */
- ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, token, event);
+ ret = hns_roce_mbox_post(hr_dev, mbox_msg);
if (ret)
dev_err_ratelimited(hr_dev->dev,
"failed to post mailbox, ret = %d.\n", ret);
@@ -2864,12 +3034,13 @@ static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
return ret;
}
-static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev, unsigned int timeout)
+static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev)
{
u8 status = 0;
int ret;
- ret = v2_wait_mbox_complete(hr_dev, timeout, &status);
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS,
+ &status);
if (!ret) {
if (status != MB_ST_COMPLETE_SUCC)
return -EBUSY;
@@ -2906,10 +3077,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
- roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M,
- CFG_SGID_TB_TABLE_IDX_S, gid_index);
- roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
- CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
+ hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
+ hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
copy_gid(&sgid_tb->vf_sgid_l, gid);
@@ -2944,25 +3113,20 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev,
copy_gid(&tb_a->vf_sgid_l, gid);
- roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M,
- CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type);
- roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S,
- vlan_id < VLAN_CFI_MASK);
- roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M,
- CFG_GMV_TB_VF_VLAN_ID_S, vlan_id);
+ hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
- roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M,
- CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]);
- roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M,
- CFG_GMV_TB_SGID_IDX_S, gid_index);
+ hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
+ hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
-static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u32 port,
- int gid_index, const union ib_gid *gid,
+static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, int gid_index,
+ const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
@@ -3005,10 +3169,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
reg_smac_l = *(u32 *)(&addr[0]);
reg_smac_h = *(u16 *)(&addr[4]);
- roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M,
- CFG_SMAC_TB_IDX_S, phy_port);
- roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M,
- CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
+ hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
+ hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -3037,38 +3199,29 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
- roce_set_field(mpt_entry->byte_48_mode_ba,
- V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
- upper_32_bits(pbl_ba >> 3));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
- roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
- V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
+ hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
- roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
- V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
- roce_set_field(mpt_entry->byte_64_buf_pa1,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
- void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx)
+ void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
- int ret;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
- hr_reg_enable(mpt_entry, MPT_L_INV_EN);
hr_reg_write_bool(mpt_entry, MPT_BIND_EN,
mr->access & IB_ACCESS_MW_BIND);
@@ -3100,9 +3253,7 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
- ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
-
- return ret;
+ return set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
@@ -3113,24 +3264,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
u32 mr_access_flags = mr->access;
int ret = 0;
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
-
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mr->pd);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
if (flags & IB_MR_REREG_ACCESS) {
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
- V2_MPT_BYTE_8_BIND_EN_S,
+ hr_reg_write(mpt_entry, MPT_BIND_EN,
(mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
- V2_MPT_BYTE_8_ATOMIC_EN_S,
+ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
+ hr_reg_write(mpt_entry, MPT_RR_EN,
mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
+ hr_reg_write(mpt_entry, MPT_RW_EN,
mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
+ hr_reg_write(mpt_entry, MPT_LW_EN,
mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
}
@@ -3161,37 +3307,27 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
return -ENOBUFS;
}
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mr->pd);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_enable(mpt_entry, MPT_RA_EN);
+ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+ hr_reg_enable(mpt_entry, MPT_FRE);
+ hr_reg_clear(mpt_entry, MPT_MR_MW);
+ hr_reg_enable(mpt_entry, MPT_BPD);
+ hr_reg_clear(mpt_entry, MPT_PA);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
- roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
- V2_MPT_BYTE_48_PBL_BA_H_S,
- upper_32_bits(pbl_ba >> 3));
-
- roce_set_field(mpt_entry->byte_64_buf_pa1,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
return 0;
}
@@ -3203,39 +3339,123 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mw->pdn);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S,
- mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
- mw->pbl_hop_num);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
-
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1);
-
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
- mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
+ hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
+
+ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
+ hr_reg_enable(mpt_entry, MPT_LW_EN);
- roce_set_field(mpt_entry->byte_64_buf_pa1,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_enable(mpt_entry, MPT_MR_MW);
+ hr_reg_enable(mpt_entry, MPT_BPD);
+ hr_reg_clear(mpt_entry, MPT_PA);
+ hr_reg_write(mpt_entry, MPT_BQP,
+ mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
mpt_entry->lkey = cpu_to_le32(mw->rkey);
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM,
+ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ mw->pbl_hop_num);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
return 0;
}
+static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_send_wr *bad_wr;
+ struct ib_rdma_wr rdma_wr = {};
+ struct ib_send_wr *send_wr;
+ int ret;
+
+ send_wr = &rdma_wr.wr;
+ send_wr->opcode = IB_WR_RDMA_WRITE;
+
+ ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+
+static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)];
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ unsigned long end;
+ int cqe_cnt = 0;
+ int npolled;
+ int ret;
+ int i;
+
+ /*
+ * If the device initialization is not complete or in the uninstall
+ * process, then there is no need to execute free mr.
+ */
+ if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT ||
+ hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT)
+ return;
+
+ mutex_lock(&free_mr->mutex);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ hr_qp = to_hr_qp(free_mr->rsv_qp[i]);
+
+ ret = free_mr_post_send_lp_wqe(hr_qp);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+ hr_qp->qpn, ret);
+ break;
+ }
+
+ cqe_cnt++;
+ }
+
+ end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
+ while (cqe_cnt) {
+ npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc);
+ if (npolled < 0) {
+ ibdev_err(ibdev,
+ "failed to poll cqe for free mr, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+
+ if (time_after(jiffies, end)) {
+ ibdev_err(ibdev,
+ "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+ cqe_cnt -= npolled;
+ }
+
+out:
+ mutex_unlock(&free_mr->mutex);
+}
+
+static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev)
+{
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_send_cmd_to_hw(hr_dev);
+}
+
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
@@ -3571,7 +3791,6 @@ static const u32 wc_send_op_map[] = {
HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
- HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV),
HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
@@ -3621,9 +3840,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
break;
- case HNS_ROCE_V2_WQE_OP_LOCAL_INV:
- wc->wc_flags |= IB_WC_WITH_INVALIDATE;
- break;
case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
@@ -3809,38 +4025,38 @@ out:
}
static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
- int step_idx, u16 *mbox_op)
+ u32 step_idx, u8 *mbox_cmd)
{
- u16 op;
+ u8 cmd;
switch (type) {
case HEM_TYPE_QPC:
- op = HNS_ROCE_CMD_WRITE_QPC_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_QPC_BT0;
break;
case HEM_TYPE_MTPT:
- op = HNS_ROCE_CMD_WRITE_MPT_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_MPT_BT0;
break;
case HEM_TYPE_CQC:
- op = HNS_ROCE_CMD_WRITE_CQC_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_CQC_BT0;
break;
case HEM_TYPE_SRQC:
- op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_SRQC_BT0;
break;
case HEM_TYPE_SCCC:
- op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_SCCC_BT0;
break;
case HEM_TYPE_QPC_TIMER:
- op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
break;
case HEM_TYPE_CQC_TIMER:
- op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
+ cmd = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
break;
default:
dev_warn(hr_dev->dev, "failed to check hem type %u.\n", type);
return -EINVAL;
}
- *mbox_op = op + step_idx;
+ *mbox_cmd = cmd + step_idx;
return 0;
}
@@ -3863,10 +4079,10 @@ static int config_gmv_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj,
}
static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj,
- dma_addr_t base_addr, u32 hem_type, int step_idx)
+ dma_addr_t base_addr, u32 hem_type, u32 step_idx)
{
int ret;
- u16 op;
+ u8 cmd;
if (unlikely(hem_type == HEM_TYPE_GMV))
return config_gmv_ba_to_hw(hr_dev, obj, base_addr);
@@ -3874,16 +4090,16 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj,
if (unlikely(hem_type == HEM_TYPE_SCCC && step_idx))
return 0;
- ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &op);
+ ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &cmd);
if (ret < 0)
return ret;
- return config_hem_ba_to_hw(hr_dev, obj, base_addr, op);
+ return config_hem_ba_to_hw(hr_dev, base_addr, cmd, obj);
}
static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
- int step_idx)
+ u32 step_idx)
{
struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
@@ -3941,29 +4157,29 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
}
static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj,
- int step_idx)
+ struct hns_roce_hem_table *table,
+ int tag, u32 step_idx)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ u8 cmd = 0xff;
int ret;
- u16 op = 0xff;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
return 0;
switch (table->type) {
case HEM_TYPE_QPC:
- op = HNS_ROCE_CMD_DESTROY_QPC_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_QPC_BT0;
break;
case HEM_TYPE_MTPT:
- op = HNS_ROCE_CMD_DESTROY_MPT_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_MPT_BT0;
break;
case HEM_TYPE_CQC:
- op = HNS_ROCE_CMD_DESTROY_CQC_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_CQC_BT0;
break;
case HEM_TYPE_SRQC:
- op = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
break;
case HEM_TYPE_SCCC:
case HEM_TYPE_QPC_TIMER:
@@ -3976,15 +4192,13 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
return 0;
}
- op += step_idx;
+ cmd += step_idx;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- /* configure the tag and op */
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, obj, 0, op,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cmd, tag);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
@@ -4008,9 +4222,8 @@ static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
memcpy(mailbox->buf, context, qpc_size);
memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
- HNS_ROCE_CMD_MODIFY_QPC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_QPC, hr_qp->qpn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -4075,7 +4288,6 @@ static inline int get_pdn(struct ib_pd *ib_pd)
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
- int attr_mask,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4139,7 +4351,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
}
static void modify_qp_init_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
+ const struct ib_qp_attr *attr,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4388,7 +4600,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_clear(qpc_mask, QPC_DQPN);
}
- memcpy(&(context->dmac), dmac, sizeof(u32));
+ memcpy(&context->dmac, dmac, sizeof(u32));
hr_reg_write(context, QPC_DMAC_H, *((u16 *)(&dmac[4])));
qpc_mask->dmac = 0;
hr_reg_clear(qpc_mask, QPC_DMAC_H);
@@ -4482,14 +4694,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
-static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
-{
- if (!fl)
- fl = rdma_calc_flow_label(lqpn, rqpn);
-
- return rdma_flow_label_to_udp_sport(fl);
-}
-
static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
u32 *dip_idx)
{
@@ -4666,6 +4870,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
u8 hr_port;
int ret;
+ /*
+ * If free_mr_en of qp is set, it means that this qp comes from
+ * free mr. This qp will perform the loopback operation.
+ * In the loopback scenario, only sl needs to be set.
+ */
+ if (hr_qp->free_mr_en) {
+ hr_reg_write(context, QPC_SL, rdma_ah_get_sl(&attr->ah_attr));
+ hr_reg_clear(qpc_mask, QPC_SL);
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ return 0;
+ }
+
ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
hr_port = ib_port - 1;
is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
@@ -4677,9 +4893,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
if (ret)
return ret;
- if (gid_attr)
- is_udp = (gid_attr->gid_type ==
- IB_GID_TYPE_ROCE_UDP_ENCAP);
+ is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
}
/* Only HIP08 needs to set the vlan_en bits in QPC */
@@ -4706,8 +4920,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
}
hr_reg_write(context, QPC_UDPSPN,
- is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num,
- attr->dest_qp_num) : 0);
+ is_udp ? rdma_get_udp_sport(grh->flow_label, ibqp->qp_num,
+ attr->dest_qp_num) :
+ 0);
hr_reg_clear(qpc_mask, QPC_UDPSPN);
@@ -4733,7 +4948,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) {
ibdev_err(ibdev,
- "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n",
+ "failed to fill QPC, sl (%u) shouldn't be larger than %d.\n",
hr_qp->sl, MAX_SERVICE_LEVEL);
return -EINVAL;
}
@@ -4762,7 +4977,8 @@ static bool check_qp_state(enum ib_qp_state cur_state,
[IB_QPS_ERR] = true },
[IB_QPS_SQD] = {},
[IB_QPS_SQE] = {},
- [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
+ [IB_QPS_ERR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_ERR] = true }
};
return sm[cur_state][new_state];
@@ -4786,11 +5002,9 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
- modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
- qpc_mask);
+ modify_qp_reset_to_init(ibqp, attr, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- modify_qp_init_to_init(ibqp, attr, attr_mask, context,
- qpc_mask);
+ modify_qp_init_to_init(ibqp, attr, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
qpc_mask);
@@ -4802,6 +5016,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
return ret;
}
+static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
+{
+#define QP_ACK_TIMEOUT_MAX_HIP08 20
+#define QP_ACK_TIMEOUT_OFFSET 10
+#define QP_ACK_TIMEOUT_MAX 31
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 20.\n");
+ return false;
+ }
+ *timeout += QP_ACK_TIMEOUT_OFFSET;
+ } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 31.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4811,6 +5049,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
int ret = 0;
+ u8 timeout;
if (attr_mask & IB_QP_AV) {
ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
@@ -4820,12 +5059,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_TIMEOUT) {
- if (attr->timeout < 31) {
- hr_reg_write(context, QPC_AT, attr->timeout);
+ timeout = attr->timeout;
+ if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
+ hr_reg_write(context, QPC_AT, timeout);
hr_reg_clear(qpc_mask, QPC_AT);
- } else {
- ibdev_warn(&hr_dev->ib_dev,
- "Local ACK timeout shall be 0 to 30.\n");
}
}
@@ -4882,7 +5119,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
- hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer);
+ hr_reg_write(context, QPC_MIN_RNR_TIME,
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
}
@@ -5053,9 +5292,8 @@ static int to_ib_qp_st(enum hns_roce_v2_qp_state state)
return (state < ARRAY_SIZE(map)) ? map[state] : -1;
}
-static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_v2_qp_context *hr_context)
+static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
{
struct hns_roce_cmd_mailbox *mailbox;
int ret;
@@ -5064,13 +5302,12 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
- HNS_ROCE_CMD_QUERY_QPC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC,
+ qpn);
if (ret)
goto out;
- memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz);
+ memcpy(buffer, mailbox->buf, hr_dev->caps.qpc_sz);
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -5100,7 +5337,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto done;
}
- ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context);
+ ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
if (ret) {
ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
ret = -EINVAL;
@@ -5298,7 +5535,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
msleep(20);
}
- ibdev_err(ibdev, "Query SCC clr done flag overtime.\n");
+ ibdev_err(ibdev, "query SCC clr done flag overtime.\n");
ret = -ETIMEDOUT;
out:
@@ -5432,9 +5669,8 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
hr_reg_write(srq_context, SRQC_LIMIT_WL, srq_attr->srq_limit);
hr_reg_clear(srqc_mask, SRQC_LIMIT_WL);
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
- HNS_ROCE_CMD_MODIFY_SRQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
ibdev_err(&hr_dev->ib_dev,
@@ -5460,9 +5696,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
return PTR_ERR(mailbox);
srq_context = mailbox->buf;
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
- HNS_ROCE_CMD_QUERY_SRQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_SRQC, srq->srqn);
if (ret) {
ibdev_err(&hr_dev->ib_dev,
"failed to process cmd of querying SRQ, ret = %d.\n",
@@ -5499,12 +5734,21 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev,
+ "cq_period(%u) reached the upper limit, adjusted to 65.\n",
+ cq_period);
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD;
+ }
+ cq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 1,
- HNS_ROCE_CMD_MODIFY_CQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
ibdev_err(&hr_dev->ib_dev,
@@ -5514,6 +5758,64 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
return ret;
}
+static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
+ void *buffer)
+{
+ struct hns_roce_v2_cq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_CQC, cqn);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when querying CQ, ret = %d.\n",
+ ret);
+ goto err_mailbox;
+ }
+
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key,
+ void *buffer)
+{
+ struct hns_roce_v2_mpt_entry *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ key_to_hw_index(key));
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when querying MPT, ret = %d.\n",
+ ret);
+ goto err_mailbox;
+ }
+
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
static void hns_roce_irq_work_handle(struct work_struct *work)
{
struct hns_roce_work *irq_work =
@@ -5522,26 +5824,26 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- ibdev_info(ibdev, "Path migrated succeeded.\n");
+ ibdev_info(ibdev, "path migrated succeeded.\n");
break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- ibdev_warn(ibdev, "Path migration failed.\n");
+ ibdev_warn(ibdev, "path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- ibdev_warn(ibdev, "Send queue drained.\n");
+ ibdev_warn(ibdev, "send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n",
+ ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n",
irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n",
+ ibdev_err(ibdev, "invalid request local work queue 0x%x error.\n",
irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n",
+ ibdev_err(ibdev, "local access violation work queue 0x%x error, sub_event type is: %d\n",
irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
@@ -5563,7 +5865,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
ibdev_warn(ibdev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_FLR:
- ibdev_warn(ibdev, "Function level reset.\n");
+ ibdev_warn(ibdev, "function level reset.\n");
break;
case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
ibdev_err(ibdev, "xrc domain violation error.\n");
@@ -5587,12 +5889,12 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
if (!irq_work)
return;
- INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
+ INIT_WORK(&irq_work->work, hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev;
irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type;
irq_work->queue_num = queue_num;
- queue_work(hr_dev->irq_workq, &(irq_work->work));
+ queue_work(hr_dev->irq_workq, &irq_work->work);
}
static void update_eq_db(struct hns_roce_eq *eq)
@@ -5627,16 +5929,16 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
(eq->cons_index & (eq->entries - 1)) *
eq->eqe_size);
- return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
+ return (hr_reg_read(aeqe, AEQE_OWNER) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
-static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
- int aeqe_found = 0;
+ irqreturn_t aeqe_found = IRQ_NONE;
int event_type;
u32 queue_num;
int sub_type;
@@ -5647,15 +5949,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
- event_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
- HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
- sub_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_V2_AEQE_SUB_TYPE_M,
- HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- queue_num = roce_get_field(aeqe->event.queue_event.num,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+ event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE);
+ sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE);
+ queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -5688,7 +5984,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
case HNS_ROCE_EVENT_TYPE_FLR:
break;
default:
- dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
+ dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n",
event_type, eq->eqn, eq->cons_index);
break;
}
@@ -5696,7 +5992,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
eq->event_type = event_type;
eq->sub_type = sub_type;
++eq->cons_index;
- aeqe_found = 1;
+ aeqe_found = IRQ_HANDLED;
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
@@ -5704,7 +6000,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
}
update_eq_db(eq);
- return aeqe_found;
+
+ return IRQ_RETVAL(aeqe_found);
}
static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
@@ -5715,15 +6012,15 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
(eq->cons_index & (eq->entries - 1)) *
eq->eqe_size);
- return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
- (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
+ return (hr_reg_read(ceqe, CEQE_OWNER) ^
+ !!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
-static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
- int ceqe_found = 0;
+ irqreturn_t ceqe_found = IRQ_NONE;
u32 cqn;
while (ceqe) {
@@ -5732,59 +6029,53 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
- cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M,
- HNS_ROCE_V2_CEQE_COMP_CQN_S);
+ cqn = hr_reg_read(ceqe, CEQE_CQN);
hns_roce_cq_completion(hr_dev, cqn);
++eq->cons_index;
- ceqe_found = 1;
+ ceqe_found = IRQ_HANDLED;
ceqe = next_ceqe_sw_v2(eq);
}
update_eq_db(eq);
- return ceqe_found;
+ return IRQ_RETVAL(ceqe_found);
}
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work;
+ irqreturn_t int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
int_work = hns_roce_v2_ceq_int(hr_dev, eq);
else
- /* Asychronous event interrupt */
+ /* Asynchronous event interrupt */
int_work = hns_roce_v2_aeq_int(hr_dev, eq);
return IRQ_RETVAL(int_work);
}
-static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
+ u32 int_st)
{
- struct hns_roce_dev *hr_dev = dev_id;
- struct device *dev = hr_dev->dev;
- int int_work = 0;
- u32 int_st;
+ struct pci_dev *pdev = hr_dev->pci_dev;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops = ae_dev->ops;
+ irqreturn_t int_work = IRQ_NONE;
u32 int_en;
- /* Abnormal interrupt */
- int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
- struct pci_dev *pdev = hr_dev->pci_dev;
- struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
- const struct hnae3_ae_ops *ops = ae_dev->ops;
+ dev_err(hr_dev->dev, "AEQ overflow!\n");
- dev_err(dev, "AEQ overflow!\n");
-
- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
+ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
@@ -5796,19 +6087,165 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
- int_work = 1;
- } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_RAS_INT_S)) {
- dev_err(dev, "RAS interrupt!\n");
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no basic abn irq found.\n");
+ }
- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_RAS_INT_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ return IRQ_RETVAL(int_work);
+}
- int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR);
+ ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE);
+ ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG);
+
+ return 0;
+}
+
+static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 addr_upper;
+ u32 addr_low;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read gmv, ret = %d.\n", ret);
+ return ret;
+ }
+
+ addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L);
+ addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+ hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low);
+ hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
+{
+ if (res_type == ECC_RESOURCE_QPC_TIMER ||
+ res_type == ECC_RESOURCE_CQC_TIMER ||
+ res_type == ECC_RESOURCE_SCCC)
+ return le64_to_cpu(*data);
+
+ return le64_to_cpu(*data) << PAGE_SHIFT;
+}
+
+static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
+ u32 index)
+{
+ u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op;
+ u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u64 addr;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read fmea ram, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ addr = fmea_get_ram_res_addr(res_type, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to write fmea ram, ret = %d.\n",
+ ret);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ u32 res_type = ecc_info->res_type;
+ u32 index = ecc_info->index;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT);
- int_work = 1;
+ if (res_type >= ECC_RESOURCE_COUNT) {
+ dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n",
+ res_type);
+ return;
+ }
+
+ if (res_type == ECC_RESOURCE_GMV)
+ ret = fmea_recover_gmv(hr_dev, index);
+ else
+ ret = fmea_recover_others(hr_dev, res_type, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to recover %s, index = %u, ret = %d.\n",
+ fmea_ram_res[res_type].name, index, ret);
+}
+
+static void fmea_ram_ecc_work(struct work_struct *ecc_work)
+{
+ struct hns_roce_dev *hr_dev =
+ container_of(ecc_work, struct hns_roce_dev, ecc_work);
+ struct fmea_ram_ecc ecc_info = {};
+
+ if (fmea_ram_ecc_query(hr_dev, &ecc_info)) {
+ dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n");
+ return;
+ }
+
+ if (!ecc_info.is_ecc_err) {
+ dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n");
+ return;
+ }
+
+ fmea_ram_ecc_recover(hr_dev, &ecc_info);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_st;
+
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
+
+ if (int_st) {
+ int_work = abnormal_interrupt_basic(hr_dev, int_st);
+ } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ queue_work(hr_dev->irq_workq, &hr_dev->ecc_work);
+ int_work = IRQ_HANDLED;
} else {
- dev_err(dev, "There is no abnormal irq found!\n");
+ dev_err(hr_dev->dev, "there is no abnormal irq found.\n");
}
return IRQ_RETVAL(int_work);
@@ -5827,21 +6264,20 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag);
}
-static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn)
{
struct device *dev = hr_dev->dev;
int ret;
+ u8 cmd;
if (eqn < hr_dev->caps.num_comp_vectors)
- ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
- 0, HNS_ROCE_CMD_DESTROY_CEQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ cmd = HNS_ROCE_CMD_DESTROY_CEQC;
else
- ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
- 0, HNS_ROCE_CMD_DESTROY_AEQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ cmd = HNS_ROCE_CMD_DESTROY_AEQC;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M);
if (ret)
- dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
+ dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn);
}
static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
@@ -5894,6 +6330,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
+ eq->eq_period);
+ eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
+ }
+ eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+
hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);
@@ -5930,22 +6375,21 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL,
0);
if (err)
- dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err);
+ dev_err(hr_dev->dev, "failed to alloc EQE mtr, err %d\n", err);
return err;
}
static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq,
- unsigned int eq_cmd)
+ struct hns_roce_eq *eq, u8 eq_cmd)
{
struct hns_roce_cmd_mailbox *mailbox;
int ret;
/* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR_OR_NULL(mailbox))
- return -ENOMEM;
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
ret = alloc_eq_buf(hr_dev, eq);
if (ret)
@@ -5955,8 +6399,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
if (ret)
goto err_cmd_mbox;
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
- eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, eq_cmd, eq->eqn);
if (ret) {
dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
goto err_cmd_mbox;
@@ -6021,7 +6464,7 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
0, hr_dev->irq_names[j - comp_num],
&eq_table->eq[j - other_num]);
if (ret) {
- dev_err(hr_dev->dev, "Request irq error!\n");
+ dev_err(hr_dev->dev, "request irq error!\n");
goto err_request_failed;
}
}
@@ -6067,14 +6510,14 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
struct device *dev = hr_dev->dev;
struct hns_roce_eq *eq;
- unsigned int eq_cmd;
- int irq_num;
- int eq_num;
int other_num;
int comp_num;
int aeq_num;
- int i;
+ int irq_num;
+ int eq_num;
+ u8 eq_cmd;
int ret;
+ int i;
other_num = hr_dev->caps.num_other_vectors;
comp_num = hr_dev->caps.num_comp_vectors;
@@ -6119,6 +6562,8 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
}
}
+ INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
+
hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
if (!hr_dev->irq_workq) {
dev_err(dev, "failed to create irq workqueue.\n");
@@ -6172,10 +6617,6 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
kfree(eq_table->eq);
}
-static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = {
- .query_cqc_info = hns_roce_v2_query_cqc_info,
-};
-
static const struct ib_device_ops hns_roce_v2_dev_ops = {
.destroy_qp = hns_roce_v2_destroy_qp,
.modify_cq = hns_roce_v2_modify_cq,
@@ -6211,10 +6652,14 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.set_hem = hns_roce_v2_set_hem,
.clear_hem = hns_roce_v2_clear_hem,
.modify_qp = hns_roce_v2_modify_qp,
+ .dereg_mr = hns_roce_v2_dereg_mr,
.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
.init_eq = hns_roce_v2_init_eq_table,
.cleanup_eq = hns_roce_v2_cleanup_eq_table,
.write_srqc = hns_roce_v2_write_srqc,
+ .query_cqc = hns_roce_v2_query_cqc,
+ .query_qpc = hns_roce_v2_query_qpc,
+ .query_mpt = hns_roce_v2_query_mpt,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
@@ -6246,7 +6691,6 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
hr_dev->is_vf = id->driver_data;
hr_dev->dev = &handle->pdev->dev;
hr_dev->hw = &hns_roce_hw_v2;
- hr_dev->dfx = &hns_roce_dfx_hw_v2;
hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
hr_dev->odb_offset = hr_dev->sdb_offset;
@@ -6292,14 +6736,25 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
ret = hns_roce_init(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "RoCE Engine init failed!\n");
- goto error_failed_get_cfg;
+ goto error_failed_cfg;
+ }
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init free mr!\n");
+ goto error_failed_roce_init;
+ }
}
handle->priv = hr_dev;
return 0;
-error_failed_get_cfg:
+error_failed_roce_init:
+ hns_roce_exit(hr_dev);
+
+error_failed_cfg:
kfree(hr_dev->priv);
error_failed_kzalloc:
@@ -6321,6 +6776,9 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
hns_roce_handle_device_err(hr_dev);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
@@ -6344,7 +6802,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
if (!id)
return 0;
- if (id->driver_data && handle->pdev->revision < PCI_REVISION_ID_HIP09)
+ if (id->driver_data && handle->pdev->revision == PCI_REVISION_ID_HIP08)
return 0;
ret = __hns_roce_hw_v2_init_instance(handle);
@@ -6428,7 +6886,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret);
} else {
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
- dev_info(dev, "Reset done, RoCE client reinit finished.\n");
+ dev_info(dev, "reset done, RoCE client reinit finished.\n");
}
return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 4d904d5e82be..c7bf2d52c1cd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -35,41 +35,25 @@
#include <linux/bitops.h>
-#define HNS_ROCE_VF_QPC_BT_NUM 256
-#define HNS_ROCE_VF_SCCC_BT_NUM 64
-#define HNS_ROCE_VF_SRQC_BT_NUM 64
-#define HNS_ROCE_VF_CQC_BT_NUM 64
-#define HNS_ROCE_VF_MPT_BT_NUM 64
-#define HNS_ROCE_VF_SMAC_NUM 32
-#define HNS_ROCE_VF_SL_NUM 8
-#define HNS_ROCE_VF_GMV_BT_NUM 256
-
#define HNS_ROCE_V2_MAX_QP_NUM 0x1000
-#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200
#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
-#define HNS_ROCE_V2_MAX_SRQ 0x100000
#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
#define HNS_ROCE_V2_MAX_SRQ_SGE 64
#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
-#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
+#define HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM 0x100
+#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100
#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
-#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64
-#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V3_MAX_SQ_INLINE 0x400
#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
-#define HNS_ROCE_V2_MAX_IRQ_NUM 65
-#define HNS_ROCE_V2_COMP_VEC_NUM 63
#define HNS_ROCE_V2_AEQE_VEC_NUM 1
#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x100000
-#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
-#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
@@ -79,9 +63,7 @@
#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64
#define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16
-#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
-#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100
#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
@@ -98,12 +80,11 @@
#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
-#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
+#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x0
#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
-#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
@@ -117,12 +98,14 @@
#define HNS_ROCE_CQE_HOP_NUM 1
#define HNS_ROCE_SRQWQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
-#define HNS_ROCE_EQE_HOP_NUM 2
#define HNS_ROCE_IDX_HOP_NUM 1
#define HNS_ROCE_SQWQE_HOP_NUM 2
#define HNS_ROCE_EXT_SGE_HOP_NUM 1
#define HNS_ROCE_RQWQE_HOP_NUM 2
+#define HNS_ROCE_V2_EQE_HOP_NUM 2
+#define HNS_ROCE_V3_EQE_HOP_NUM 1
+
#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6
#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2
#define HNS_ROCE_V2_GID_INDEX_NUM 16
@@ -153,6 +136,18 @@ enum {
#define CMD_CSQ_DESC_NUM 1024
#define CMD_CRQ_DESC_NUM 1024
+/* Free mr used parameters */
+#define HNS_ROCE_FREE_MR_USED_CQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_QP_NUM 0x8
+#define HNS_ROCE_FREE_MR_USED_PSN 0x0808
+#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT 0x7
+#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT 0x12
+#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM 0x2
+#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM 0x2
+#define HNS_ROCE_V2_FREE_MR_TIMEOUT 4500
+
enum {
NO_ARMED = 0x0,
REG_NXT_CEQE = 0x2,
@@ -184,7 +179,6 @@ enum {
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
- HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
};
@@ -252,6 +246,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
HNS_ROCE_OPC_EXT_CFG = 0x8512,
+ HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
@@ -305,33 +300,6 @@ struct hns_roce_v2_cq_context {
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
-#define V2_CQC_BYTE_4_ARM_ST_S 6
-#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6)
-
-#define V2_CQC_BYTE_4_CEQN_S 15
-#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15)
-
-#define V2_CQC_BYTE_8_CQN_S 0
-#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30
-#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30)
-
-#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0
-#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0
-#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_52_CQE_CNT_S 0
-#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0
-#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0)
-
-#define V2_CQC_BYTE_56_CQ_PERIOD_S 16
-#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16)
-
#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
#define CQC_CQ_ST CQC_FIELD_LOC(1, 0)
@@ -434,6 +402,7 @@ enum hns_roce_v2_qp_state {
struct hns_roce_v2_qp_context_ex {
__le32 data[64];
};
+
struct hns_roce_v2_qp_context {
__le32 byte_4_sqpn_tst;
__le32 wqe_sge_ba;
@@ -786,16 +755,20 @@ struct hns_roce_v2_mpt_entry {
#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71)
#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72)
#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96)
-#define MPT_LEN MPT_FIELD_LOC(191, 128)
+#define MPT_LEN_L MPT_FIELD_LOC(159, 128)
+#define MPT_LEN_H MPT_FIELD_LOC(191, 160)
#define MPT_LKEY MPT_FIELD_LOC(223, 192)
#define MPT_VA MPT_FIELD_LOC(287, 224)
#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
-#define MPT_PBL_BA MPT_FIELD_LOC(380, 320)
+#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
+#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
-#define MPT_PA0 MPT_FIELD_LOC(441, 384)
+#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
+#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
-#define MPT_PA1 MPT_FIELD_LOC(505, 448)
+#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
+#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
@@ -901,48 +874,24 @@ struct hns_roce_v2_ud_send_wqe {
u8 dgid[GID_LEN_V2];
};
-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
-
-#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
-
-#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
-
-#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
-
-#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
-#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
-
-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
-
-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
-
-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
-
-#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
-#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
-
-#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
-#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
-
-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
-
-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
-
-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
-
-#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
-#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
-
-#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
-
-#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
+#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
+
+#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
+#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
+#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
+#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
+#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
+#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
+#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
+#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
+#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
+#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
+#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
+#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
+#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
+#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
+#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
+#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
struct hns_roce_v2_rc_send_wqe {
__le32 byte_4;
@@ -957,42 +906,22 @@ struct hns_roce_v2_rc_send_wqe {
__le64 va;
};
-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
-
-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
-
-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
-
-#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
-#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
-
-#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
-
-#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
-
-#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9
-
-#define V2_RC_SEND_WQE_BYTE_4_SO_S 10
-
-#define V2_RC_SEND_WQE_BYTE_4_SE_S 11
-
-#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
-
-#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
-
-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
-
-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24
-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
-
-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
-
-#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
+#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
+
+#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
+#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
+#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
+#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
+#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
+#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
+#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
+#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
+#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
+#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
+#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
+#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
+#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
+#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
@@ -1035,7 +964,10 @@ struct hns_roce_func_clear {
__le32 rsv[4];
};
-#define FUNC_CLEAR_RST_FUN_DONE_S 0
+#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l)
+
+#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32)
+
/* Each physical function manages up to 248 virtual functions, it takes up to
* 100ms for each function to execute clear. If an abnormal reset occurs, it is
* executed twice at most, so it takes up to 249 * 2 * 100ms.
@@ -1114,12 +1046,12 @@ struct hns_roce_vf_switch {
__le32 resv3;
};
-#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
-#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
+#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
-#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
-#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
-#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
+#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
+#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
+#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
+#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
struct hns_roce_post_mbox {
__le32 in_param_l;
@@ -1173,6 +1105,11 @@ enum {
#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
+/* Fields of HNS_ROCE_QUERY_RAM_ECC */
+#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
+#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
+#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
+
struct hns_roce_cfg_sgid_tb {
__le32 table_idx_rsv;
__le32 vf_sgid_l;
@@ -1182,11 +1119,10 @@ struct hns_roce_cfg_sgid_tb {
__le32 vf_sgid_type_rsv;
};
-#define CFG_SGID_TB_TABLE_IDX_S 0
-#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
+#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
-#define CFG_SGID_TB_VF_SGID_TYPE_S 0
-#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
+#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
+#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
struct hns_roce_cfg_smac_tb {
__le32 tb_idx_rsv;
@@ -1194,11 +1130,11 @@ struct hns_roce_cfg_smac_tb {
__le32 vf_smac_h_rsv;
__le32 rsv[3];
};
-#define CFG_SMAC_TB_IDX_S 0
-#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
-#define CFG_SMAC_TB_VF_SMAC_H_S 0
-#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
+#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
+
+#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
+#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
struct hns_roce_cfg_gmv_tb_a {
__le32 vf_sgid_l;
@@ -1209,16 +1145,11 @@ struct hns_roce_cfg_gmv_tb_a {
__le32 resv;
};
-#define CFG_GMV_TB_SGID_IDX_S 0
-#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0)
+#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
-#define CFG_GMV_TB_VF_SGID_TYPE_S 0
-#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0)
-
-#define CFG_GMV_TB_VF_VLAN_EN_S 2
-
-#define CFG_GMV_TB_VF_VLAN_ID_S 16
-#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16)
+#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
+#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
+#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
struct hns_roce_cfg_gmv_tb_b {
__le32 vf_smac_l;
@@ -1227,8 +1158,10 @@ struct hns_roce_cfg_gmv_tb_b {
__le32 resv[3];
};
-#define CFG_GMV_TB_SMAC_H_S 0
-#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0)
+#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
+
+#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
+#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
struct hns_roce_query_pf_caps_a {
@@ -1237,7 +1170,7 @@ struct hns_roce_query_pf_caps_a {
__le16 max_sq_sg;
__le16 max_sq_inline;
__le16 max_rq_sg;
- __le32 max_extend_sg;
+ __le32 rsv0;
__le16 num_qpc_timer;
__le16 num_cqc_timer;
__le16 max_srq_sges;
@@ -1245,7 +1178,7 @@ struct hns_roce_query_pf_caps_a {
u8 num_other_vectors;
u8 max_sq_desc_sz;
u8 max_rq_desc_sz;
- u8 max_srq_desc_sz;
+ u8 rsv1;
u8 cqe_sz;
};
@@ -1280,29 +1213,17 @@ struct hns_roce_query_pf_caps_c {
__le16 rq_depth;
};
-#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0
-#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20
-#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20)
-
-#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0
-#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0)
+#define PF_CAPS_C_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l)
-#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20
-#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20)
-
-#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0
-#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0)
-
-#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0
-#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0
-#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20
-#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20)
+#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0)
+#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20)
+#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
+#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
+#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
+#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
+#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
+#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
struct hns_roce_query_pf_caps_d {
__le32 wq_hop_num_max_srqs;
@@ -1313,20 +1234,26 @@ struct hns_roce_query_pf_caps_d {
__le32 num_uars_rsv_pds;
__le32 rsv_uars_rsv_qps;
};
-#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0
-#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20
-#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20)
-#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22
-#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22)
-
-#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24
-#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24)
-
-#define V2_QUERY_PF_CAPS_D_CONG_TYPE_S 26
-#define V2_QUERY_PF_CAPS_D_CONG_TYPE_M GENMASK(29, 26)
+#define PF_CAPS_D_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l)
+
+#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0)
+#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
+#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
+#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
+#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
+#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
+#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
+#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
+#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
+#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
+#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
+#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180)
+
+#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
struct hns_roce_congestion_algorithm {
u8 alg_sel;
@@ -1335,33 +1262,6 @@ struct hns_roce_congestion_algorithm {
u8 wnd_mode_sel;
};
-#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0
-#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0)
-
-#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22
-#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22)
-
-#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0
-#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0)
-
-#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22
-#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22)
-
-#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24
-#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24)
-
-#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0
-#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20
-#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20)
-
-#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0
-#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20
-#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20)
-
struct hns_roce_query_pf_caps_e {
__le32 chunk_size_shift_rsv_mrws;
__le32 rsv_cqs;
@@ -1373,20 +1273,14 @@ struct hns_roce_query_pf_caps_e {
__le16 aeq_period;
};
-#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0
-#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20
-#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20)
-
-#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0
-#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0)
+#define PF_CAPS_E_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
-#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0
-#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0)
-
-#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0
-#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0)
+#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
+#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
+#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
+#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
+#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
struct hns_roce_cmq_req {
__le32 data[6];
@@ -1432,18 +1326,40 @@ struct hns_roce_link_table {
#define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12))
#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
+struct hns_roce_v2_free_mr {
+ struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+ struct ib_cq *rsv_cq;
+ struct ib_pd *rsv_pd;
+ struct mutex mutex;
+};
+
struct hns_roce_v2_priv {
struct hnae3_handle *handle;
struct hns_roce_v2_cmq cmq;
struct hns_roce_link_table ext_llm;
+ struct hns_roce_v2_free_mr free_mr;
};
struct hns_roce_dip {
u8 dgid[GID_LEN_V2];
u32 dip_idx;
- struct list_head node; /* all dips are on a list */
+ struct list_head node; /* all dips are on a list */
+};
+
+struct fmea_ram_ecc {
+ u32 is_ecc_err;
+ u32 res_type;
+ u32 index;
};
+/* only for RNR timeout issue of HIP08 */
+#define HNS_ROCE_CLOCK_ADJUST 1000
+#define HNS_ROCE_MAX_CQ_PERIOD 65
+#define HNS_ROCE_MAX_EQ_PERIOD 65
+#define HNS_ROCE_RNR_TIMER_10NS 1
+#define HNS_ROCE_1US_CFG 999
+#define HNS_ROCE_1NS_CFG 0
+
#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
@@ -1471,14 +1387,10 @@ struct hns_roce_dip {
#define HNS_ROCE_EQ_INIT_CONS_IDX 0
#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
-#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31
-#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31
-
#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
-#define HNS_ROCE_V2_VF_INT_ST_RAS_INT_S 1
#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
@@ -1530,18 +1442,6 @@ struct hns_roce_eq_context {
#define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320)
#define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340)
-#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
-#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
-
-#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0
-#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0)
-
-#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8
-#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8)
-
-#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
-#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
-
#define MAX_SERVICE_LEVEL 0x7
struct hns_roce_wqe_atomic_seg {
@@ -1559,9 +1459,6 @@ struct hns_roce_sccc_clr_done {
__le32 rsv[5];
};
-int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn,
- int *buffer);
-
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c
deleted file mode 100644
index 5a97b5a0b7be..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-// Copyright (c) 2019 Hisilicon Limited.
-
-#include "hnae3.h"
-#include "hns_roce_device.h"
-#include "hns_roce_cmd.h"
-#include "hns_roce_hw_v2.h"
-
-int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn,
- int *buffer)
-{
- struct hns_roce_v2_cq_context *cq_context;
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- cq_context = mailbox->buf;
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cqn, 0,
- HNS_ROCE_CMD_QUERY_CQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (ret) {
- dev_err(hr_dev->dev, "QUERY cqc cmd process error\n");
- goto err_mailbox;
- }
-
- memcpy(buffer, cq_context, sizeof(*cq_context));
-
-err_mailbox:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return ret;
-}
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 4194b626f3c6..dcf89689a4c6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
#include <linux/acpi.h>
-#include <linux/of_platform.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <rdma/ib_addr.h>
@@ -70,7 +69,7 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, &attr->gid, attr);
return ret;
}
@@ -84,7 +83,7 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL);
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, NULL, NULL);
return ret;
}
@@ -98,7 +97,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port,
netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
- dev_err(dev, "Can't find netdev on port(%u)!\n", port);
+ dev_err(dev, "can't find netdev on port(%u)!\n", port);
return -ENODEV;
}
@@ -152,9 +151,6 @@ static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) {
- if (hr_dev->hw->set_mtu)
- hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i],
- hr_dev->caps.max_mtu);
ret = hns_roce_set_mac(hr_dev, i,
hr_dev->iboe.netdevs[i]->dev_addr);
if (ret)
@@ -243,7 +239,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
net_dev = hr_dev->iboe.netdevs[port];
if (!net_dev) {
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- dev_err(dev, "Find netdev %u failed!\n", port);
+ dev_err(dev, "find netdev %u failed!\n", port);
return -EINVAL;
}
@@ -270,6 +266,9 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
static int hns_roce_query_pkey(struct ib_device *ib_dev, u32 port, u16 index,
u16 *pkey)
{
+ if (index > 0)
+ return -EINVAL;
+
*pkey = PKEY_ID;
return 0;
@@ -307,9 +306,22 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
entry->address = address;
entry->mmap_type = mmap_type;
- ret = rdma_user_mmap_entry_insert_exact(
- ucontext, &entry->rdma_entry, length,
- mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1);
+ switch (mmap_type) {
+ /* pgoff 0 must be used by DB for compatibility */
+ case HNS_ROCE_MMAP_TYPE_DB:
+ ret = rdma_user_mmap_entry_insert_exact(
+ ucontext, &entry->rdma_entry, length, 0);
+ break;
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ ret = rdma_user_mmap_entry_insert_range(
+ ucontext, &entry->rdma_entry, length, 1,
+ U32_MAX);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
if (ret) {
kfree(entry);
return NULL;
@@ -323,18 +335,12 @@ static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context)
if (context->db_mmap_entry)
rdma_user_mmap_entry_remove(
&context->db_mmap_entry->rdma_entry);
-
- if (context->tptr_mmap_entry)
- rdma_user_mmap_entry_remove(
- &context->tptr_mmap_entry->rdma_entry);
}
static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
{
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
- struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
u64 address;
- int ret;
address = context->uar.pfn << PAGE_SHIFT;
context->db_mmap_entry = hns_roce_user_mmap_entry_insert(
@@ -342,27 +348,7 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
if (!context->db_mmap_entry)
return -ENOMEM;
- if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
- return 0;
-
- /*
- * FIXME: using io_remap_pfn_range on the dma address returned
- * by dma_alloc_coherent is totally wrong.
- */
- context->tptr_mmap_entry =
- hns_roce_user_mmap_entry_insert(uctx, hr_dev->tptr_dma_addr,
- hr_dev->tptr_size,
- HNS_ROCE_MMAP_TYPE_TPTR);
- if (!context->tptr_mmap_entry) {
- ret = -ENOMEM;
- goto err;
- }
-
return 0;
-
-err:
- hns_roce_dealloc_uar_entry(context);
- return ret;
}
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
@@ -436,10 +422,15 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
entry = to_hns_mmap(rdma_entry);
pfn = entry->address >> PAGE_SHIFT;
- prot = vma->vm_page_prot;
- if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
- prot = pgprot_noncached(prot);
+ switch (entry->mmap_type) {
+ case HNS_ROCE_MMAP_TYPE_DB:
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ default:
+ return -EINVAL;
+ }
ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
prot, rdma_entry);
@@ -524,7 +515,6 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.destroy_ah = hns_roce_destroy_ah,
.destroy_cq = hns_roce_destroy_cq,
.disassociate_ucontext = hns_roce_disassociate_ucontext,
- .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
.get_dma_mr = hns_roce_get_dma_mr,
.get_link_layer = hns_roce_get_link_layer,
.get_port_immutable = hns_roce_port_immutable,
@@ -575,6 +565,15 @@ static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
};
+static const struct ib_device_ops hns_roce_dev_restrack_ops = {
+ .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = hns_roce_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = hns_roce_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw,
+};
+
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
int ret;
@@ -614,6 +613,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops);
for (i = 0; i < hr_dev->caps.num_ports; i++) {
if (!hr_dev->iboe.netdevs[i])
continue;
@@ -659,17 +659,17 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table,
HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz,
- hr_dev->caps.num_mtpts, 1);
+ hr_dev->caps.num_mtpts);
if (ret) {
- dev_err(dev, "Failed to init MTPT context memory, aborting.\n");
+ dev_err(dev, "failed to init MTPT context memory, aborting.\n");
return ret;
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
- dev_err(dev, "Failed to init QP context memory, aborting.\n");
+ dev_err(dev, "failed to init QP context memory, aborting.\n");
goto err_unmap_dmpt;
}
@@ -677,9 +677,9 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
HEM_TYPE_IRRL,
hr_dev->caps.irrl_entry_sz *
hr_dev->caps.max_qp_init_rdma,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
- dev_err(dev, "Failed to init irrl_table memory, aborting.\n");
+ dev_err(dev, "failed to init irrl_table memory, aborting.\n");
goto err_unmap_qp;
}
@@ -689,19 +689,19 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
HEM_TYPE_TRRL,
hr_dev->caps.trrl_entry_sz *
hr_dev->caps.max_qp_dest_rdma,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
dev_err(dev,
- "Failed to init trrl_table memory, aborting.\n");
+ "failed to init trrl_table memory, aborting.\n");
goto err_unmap_irrl;
}
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table,
HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz,
- hr_dev->caps.num_cqs, 1);
+ hr_dev->caps.num_cqs);
if (ret) {
- dev_err(dev, "Failed to init CQ context memory, aborting.\n");
+ dev_err(dev, "failed to init CQ context memory, aborting.\n");
goto err_unmap_trrl;
}
@@ -709,10 +709,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
HEM_TYPE_SRQC,
hr_dev->caps.srqc_entry_sz,
- hr_dev->caps.num_srqs, 1);
+ hr_dev->caps.num_srqs);
if (ret) {
dev_err(dev,
- "Failed to init SRQ context memory, aborting.\n");
+ "failed to init SRQ context memory, aborting.\n");
goto err_unmap_cq;
}
}
@@ -722,10 +722,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
&hr_dev->qp_table.sccc_table,
HEM_TYPE_SCCC,
hr_dev->caps.sccc_sz,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
dev_err(dev,
- "Failed to init SCC context memory, aborting.\n");
+ "failed to init SCC context memory, aborting.\n");
goto err_unmap_srq;
}
}
@@ -734,10 +734,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
HEM_TYPE_QPC_TIMER,
hr_dev->caps.qpc_timer_entry_sz,
- hr_dev->caps.num_qpc_timer, 1);
+ hr_dev->caps.qpc_timer_bt_num);
if (ret) {
dev_err(dev,
- "Failed to init QPC timer memory, aborting.\n");
+ "failed to init QPC timer memory, aborting.\n");
goto err_unmap_ctx;
}
}
@@ -746,10 +746,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
HEM_TYPE_CQC_TIMER,
hr_dev->caps.cqc_timer_entry_sz,
- hr_dev->caps.num_cqc_timer, 1);
+ hr_dev->caps.cqc_timer_bt_num);
if (ret) {
dev_err(dev,
- "Failed to init CQC timer memory, aborting.\n");
+ "failed to init CQC timer memory, aborting.\n");
goto err_unmap_qpc_timer;
}
}
@@ -758,7 +758,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table,
HEM_TYPE_GMV,
hr_dev->caps.gmv_entry_sz,
- hr_dev->caps.gmv_entry_num, 1);
+ hr_dev->caps.gmv_entry_num);
if (ret) {
dev_err(dev,
"failed to init gmv table memory, ret = %d\n",
@@ -816,7 +816,6 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
int ret;
spin_lock_init(&hr_dev->sm_lock);
- spin_lock_init(&hr_dev->bt_cmd_lock);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
@@ -828,13 +827,13 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar);
if (ret) {
- dev_err(dev, "Failed to allocate priv_uar.\n");
+ dev_err(dev, "failed to allocate priv_uar.\n");
goto err_uar_table_free;
}
ret = hns_roce_init_qp_table(hr_dev);
if (ret) {
- dev_err(dev, "Failed to init qp_table.\n");
+ dev_err(dev, "failed to init qp_table.\n");
goto err_uar_table_free;
}
@@ -847,9 +846,8 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
hns_roce_init_cq_table(hr_dev);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
hns_roce_init_srq_table(hr_dev);
- }
return 0;
@@ -907,26 +905,19 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
struct device *dev = hr_dev->dev;
int ret;
- if (hr_dev->hw->reset) {
- ret = hr_dev->hw->reset(hr_dev, true);
- if (ret) {
- dev_err(dev, "Reset RoCE engine failed!\n");
- return ret;
- }
- }
hr_dev->is_reset = false;
if (hr_dev->hw->cmq_init) {
ret = hr_dev->hw->cmq_init(hr_dev);
if (ret) {
- dev_err(dev, "Init RoCE Command Queue failed!\n");
- goto error_failed_cmq_init;
+ dev_err(dev, "init RoCE Command Queue failed!\n");
+ return ret;
}
}
ret = hr_dev->hw->hw_profile(hr_dev);
if (ret) {
- dev_err(dev, "Get RoCE engine profile failed!\n");
+ dev_err(dev, "get RoCE engine profile failed!\n");
goto error_failed_cmd_init;
}
@@ -1003,12 +994,6 @@ error_failed_cmd_init:
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
-error_failed_cmq_init:
- if (hr_dev->hw->reset) {
- if (hr_dev->hw->reset(hr_dev, false))
- dev_err(dev, "Dereset RoCE engine failed!\n");
- }
-
return ret;
}
@@ -1028,8 +1013,6 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
hns_roce_cmd_cleanup(hr_dev);
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
- if (hr_dev->hw->reset)
- hr_dev->hw->reset(hr_dev, false);
}
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 7089ac780291..845ac7d3831f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <linux/vmalloc.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
@@ -48,24 +47,6 @@ unsigned long key_to_hw_index(u32 key)
return (key << 24) | (key >> 8);
}
-static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
-{
- return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
- HNS_ROCE_CMD_CREATE_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
-{
- return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
- mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
@@ -81,7 +62,7 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
return -ENOMEM;
}
- mr->key = hw_index_to_key(id); /* MR key */
+ mr->key = hw_index_to_key(id); /* MR key */
err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
(unsigned long)id);
@@ -138,14 +119,13 @@ static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
}
-static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr)
+static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
if (mr->enabled) {
- ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
key_to_hw_index(mr->key) &
(hr_dev->caps.num_mtpts - 1));
if (ret)
@@ -167,14 +147,11 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
/* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- return ret;
- }
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
if (mr->type != MR_TYPE_FRMR)
- ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr,
- mtpt_idx);
+ ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
else
ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
if (ret) {
@@ -182,7 +159,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
goto err_page;
}
- ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
@@ -213,7 +190,7 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
int ret;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (mr == NULL)
+ if (!mr)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
@@ -272,7 +249,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_alloc_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->ibmr.length = length;
return &mr->ibmr;
@@ -305,13 +281,14 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
return ERR_CAST(mailbox);
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
- HNS_ROCE_CMD_QUERY_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ mtpt_idx);
if (ret)
goto free_cmd_mbox;
- ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ mtpt_idx);
if (ret)
ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
@@ -341,7 +318,8 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
goto free_cmd_mbox;
}
- ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx);
if (ret) {
ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
goto free_cmd_mbox;
@@ -361,16 +339,14 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct hns_roce_mr *mr = to_hr_mr(ibmr);
- int ret = 0;
- if (hr_dev->hw->dereg_mr) {
- ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
- } else {
- hns_roce_mr_free(hr_dev, mr);
- kfree(mr);
- }
+ if (hr_dev->hw->dereg_mr)
+ hr_dev->hw->dereg_mr(hr_dev);
- return ret;
+ hns_roce_mr_free(hr_dev, mr);
+ kfree(mr);
+
+ return 0;
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
@@ -486,7 +462,7 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
int ret;
if (mw->enabled) {
- ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
key_to_hw_index(mw->rkey) &
(hr_dev->caps.num_mtpts - 1));
if (ret)
@@ -526,7 +502,7 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
goto err_page;
}
- ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
@@ -609,15 +585,12 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
while (offset < end && npage < max_count) {
count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
- offset, &count, NULL);
+ offset, &count);
if (!mtts)
return -ENOBUFS;
for (i = 0; i < count && npage < max_count; i++) {
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- addr = to_hr_hw_page_addr(pages[npage]);
- else
- addr = pages[npage];
+ addr = pages[npage];
mtts[i] = cpu_to_le64(addr);
npage++;
@@ -824,11 +797,11 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
}
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+ u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
{
struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
int mtt_count, left;
- int start_index;
+ u32 start_index;
int total = 0;
__le64 *mtts;
u32 npage;
@@ -847,10 +820,7 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
continue;
addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- mtt_buf[total] = to_hr_hw_page_addr(addr);
- else
- mtt_buf[total] = addr;
+ mtt_buf[total] = addr;
total++;
}
@@ -864,7 +834,7 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
start_index + total,
- &mtt_count, NULL);
+ &mtt_count);
if (!mtts || !mtt_count)
goto done;
@@ -884,10 +854,10 @@ done:
static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
struct hns_roce_buf_attr *attr,
struct hns_roce_hem_cfg *cfg,
- unsigned int *buf_page_shift, int unalinged_size)
+ unsigned int *buf_page_shift, u64 unalinged_size)
{
struct hns_roce_buf_region *r;
- int first_region_padding;
+ u64 first_region_padding;
int page_cnt, region_cnt;
unsigned int page_shift;
size_t buf_size;
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 81ffad77ae42..783e71852c50 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <linux/pci.h>
#include "hns_roce_device.h"
@@ -86,7 +85,6 @@ int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
- struct resource *res;
int id;
/* Using bitmap to manager UAR index */
@@ -104,18 +102,9 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
else
uar->index = 0;
- if (!dev_is_pci(hr_dev->dev)) {
- res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- ida_free(&uar_ida->ida, id);
- dev_err(&hr_dev->pdev->dev, "memory resource not found!\n");
- return -EINVAL;
- }
- uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index;
- } else {
- uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2))
- >> PAGE_SHIFT);
- }
+ uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) >> PAGE_SHIFT);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_dev->dwqe_page = pci_resource_start(hr_dev->pci_dev, 4);
return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 9af4509894e6..f0bd82a18069 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -32,7 +32,6 @@
*/
#include <linux/pci.h>
-#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
@@ -57,7 +56,7 @@ static void flush_work_handle(struct work_struct *work)
if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
if (ret)
- dev_err(dev, "Modify QP to error state failed(%d) during CQE flush\n",
+ dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n",
ret);
}
@@ -106,16 +105,15 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
xa_unlock(&hr_dev->qp_table_xa);
if (!qp) {
- dev_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ dev_warn(dev, "async event for bogus QP %08x\n", qpn);
return;
}
- if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
- (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
- event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH)) {
+ if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
+ event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
+ event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
+ event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
+ event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
qp->state = IB_QPS_ERR;
flush_cqe(hr_dev, qp);
@@ -219,14 +217,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
int ret;
if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
- /* when hw version is v1, the sqpn is allocated */
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- num = HNS_ROCE_MAX_PORTS +
- hr_dev->iboe.phy_port[hr_qp->port];
- else
- num = 1;
-
- hr_qp->doorbell_qpn = 1;
+ num = 1;
} else {
mutex_lock(&qp_table->bank_mutex);
bankid = get_least_load_bankid_for_qp(qp_table->bank);
@@ -242,8 +233,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
qp_table->bank[bankid].inuse++;
mutex_unlock(&qp_table->bank_mutex);
-
- hr_qp->doorbell_qpn = (u32)num;
}
hr_qp->qpn = num;
@@ -251,26 +240,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
return 0;
}
-enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
-{
- switch (state) {
- case IB_QPS_RESET:
- return HNS_ROCE_QP_STATE_RST;
- case IB_QPS_INIT:
- return HNS_ROCE_QP_STATE_INIT;
- case IB_QPS_RTR:
- return HNS_ROCE_QP_STATE_RTR;
- case IB_QPS_RTS:
- return HNS_ROCE_QP_STATE_RTS;
- case IB_QPS_SQD:
- return HNS_ROCE_QP_STATE_SQD;
- case IB_QPS_ERR:
- return HNS_ROCE_QP_STATE_ERR;
- default:
- return HNS_ROCE_QP_NUM_STATE;
- }
-}
-
static void add_qp_to_list(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct ib_cq *send_cq, struct ib_cq *recv_cq)
@@ -306,7 +275,7 @@ static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
if (ret)
- dev_err(hr_dev->dev, "Failed to xa store for QPC\n");
+ dev_err(hr_dev->dev, "failed to xa store for QPC\n");
else
/* add QP to device's QP list for softwc */
add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
@@ -324,22 +293,17 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
if (!hr_qp->qpn)
return -EINVAL;
- /* In v1 engine, GSI QP context is saved in the RoCE hw's register */
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI &&
- hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- return 0;
-
/* Alloc memory for QPC */
ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
if (ret) {
- dev_err(dev, "Failed to get QPC table\n");
+ dev_err(dev, "failed to get QPC table\n");
goto err_out;
}
/* Alloc memory for IRRL */
ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
if (ret) {
- dev_err(dev, "Failed to get IRRL table\n");
+ dev_err(dev, "failed to get IRRL table\n");
goto err_put_qp;
}
@@ -348,7 +312,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
hr_qp->qpn);
if (ret) {
- dev_err(dev, "Failed to get TRRL table\n");
+ dev_err(dev, "failed to get TRRL table\n");
goto err_put_irrl;
}
}
@@ -358,7 +322,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
if (ret) {
- dev_err(dev, "Failed to get SCC CTX table\n");
+ dev_err(dev, "failed to get SCC CTX table\n");
goto err_put_trrl;
}
}
@@ -379,6 +343,11 @@ err_out:
return ret;
}
+static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
+{
+ rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
+}
+
void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct xarray *xa = &hr_dev->qp_table_xa;
@@ -402,11 +371,6 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- /* In v1 engine, GSI QP context is saved in the RoCE hw's register */
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI &&
- hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- return;
-
if (hr_dev->caps.trrl_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
@@ -495,11 +459,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
hr_qp->rq.rsv_sge);
- if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
- hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
- else
- hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
- hr_qp->rq.max_gs);
+ hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+ hr_qp->rq.max_gs);
hr_qp->rq.wqe_cnt = cnt;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE &&
@@ -535,11 +496,6 @@ static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
- hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE;
- return;
- }
-
hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
@@ -780,7 +736,11 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
goto err_inline;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
+
return 0;
+
err_inline:
free_rq_inline_buf(hr_qp);
@@ -822,6 +782,35 @@ static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
hns_roce_qp_has_rq(init_attr));
}
+static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
+ struct hns_roce_dev *hr_dev,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ struct hns_roce_ucontext *uctx =
+ rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ struct rdma_user_mmap_entry *rdma_entry;
+ u64 address;
+
+ address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
+
+ hr_qp->dwqe_mmap_entry =
+ hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
+ HNS_ROCE_DWQE_SIZE,
+ HNS_ROCE_MMAP_TYPE_DWQE);
+
+ if (!hr_qp->dwqe_mmap_entry) {
+ ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
+ return -ENOMEM;
+ }
+
+ rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
+ resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
+
+ return 0;
+}
+
static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr,
@@ -909,10 +898,16 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
+ ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
+ if (ret)
+ return ret;
+ }
+
ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
resp);
if (ret)
- return ret;
+ goto err_remove_qp;
} else {
ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
if (ret)
@@ -920,6 +915,12 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
}
return 0;
+
+err_remove_qp:
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
+
+ return ret;
}
static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
@@ -933,6 +934,8 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
} else {
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
hns_roce_free_db(hr_dev, &hr_qp->rdb);
@@ -1158,7 +1161,7 @@ static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
goto out;
break;
case IB_QPT_UD:
- if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 &&
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 &&
is_user)
goto out;
break;
@@ -1200,7 +1203,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
if (ret)
- ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n",
+ ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
init_attr->qp_type, ret);
return ret;
@@ -1391,7 +1394,7 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
}
}
-static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
+static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset)
{
return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 259444c0a630..989a2af2e938 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -9,112 +9,223 @@
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
-static int hns_roce_fill_cq(struct sk_buff *msg,
- struct hns_roce_v2_cq_context *context)
+#define MAX_ENTRY_NUM 256
+
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
{
- if (rdma_nl_put_driver_u32(msg, "state",
- roce_get_field(context->byte_4_pg_ceqn,
- V2_CQC_BYTE_4_ARM_ST_M,
- V2_CQC_BYTE_4_ARM_ST_S)))
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", hr_cq->cq_depth))
goto err;
- if (rdma_nl_put_driver_u32(msg, "ceqn",
- roce_get_field(context->byte_4_pg_ceqn,
- V2_CQC_BYTE_4_CEQN_M,
- V2_CQC_BYTE_4_CEQN_S)))
+ if (rdma_nl_put_driver_u32(msg, "cons_index", hr_cq->cons_index))
goto err;
- if (rdma_nl_put_driver_u32(msg, "cqn",
- roce_get_field(context->byte_8_cqn,
- V2_CQC_BYTE_8_CQN_M,
- V2_CQC_BYTE_8_CQN_S)))
+ if (rdma_nl_put_driver_u32(msg, "cqe_size", hr_cq->cqe_size))
goto err;
- if (rdma_nl_put_driver_u32(msg, "hopnum",
- roce_get_field(context->byte_16_hop_addr,
- V2_CQC_BYTE_16_CQE_HOP_NUM_M,
- V2_CQC_BYTE_16_CQE_HOP_NUM_S)))
+ if (rdma_nl_put_driver_u32(msg, "arm_sn", hr_cq->arm_sn))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "pi",
- roce_get_field(context->byte_28_cq_pi,
- V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M,
- V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S)))
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct hns_roce_v2_cq_context context;
+ u32 data[MAX_ENTRY_NUM] = {};
+ int offset = 0;
+ int ret;
+
+ if (!hr_dev->hw->query_cqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context);
+ if (ret)
+ return -EINVAL;
+
+ data[offset++] = hr_reg_read(&context, CQC_CQ_ST);
+ data[offset++] = hr_reg_read(&context, CQC_SHIFT);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_CNT);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX);
+ data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN);
+ data[offset++] = hr_reg_read(&context, CQC_ARM_ST);
+ data[offset++] = hr_reg_read(&context, CQC_CMD_SN);
+ data[offset++] = hr_reg_read(&context, CQC_CEQN);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT);
+ data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ);
+ data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ);
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+
+ return ret;
+}
+
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_wqe_cnt", hr_qp->sq.wqe_cnt))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "ci",
- roce_get_field(context->byte_32_cq_ci,
- V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M,
- V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_max_gs", hr_qp->sq.max_gs))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "coalesce",
- roce_get_field(context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_MAX_CNT_M,
- V2_CQC_BYTE_56_CQ_MAX_CNT_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_wqe_cnt", hr_qp->rq.wqe_cnt))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "period",
- roce_get_field(context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_PERIOD_M,
- V2_CQC_BYTE_56_CQ_PERIOD_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_max_gs", hr_qp->rq.max_gs))
goto err;
- if (rdma_nl_put_driver_u32(msg, "cnt",
- roce_get_field(context->byte_52_cqe_cnt,
- V2_CQC_BYTE_52_CQE_CNT_M,
- V2_CQC_BYTE_52_CQE_CNT_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "ext_sge_sge_cnt", hr_qp->sge.sge_cnt))
goto err;
+ nla_nest_end(msg, table_attr);
+
return 0;
err:
+ nla_nest_cancel(msg, table_attr);
+
return -EMSGSIZE;
}
-int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
- struct ib_cq *ib_cq)
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
- struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- struct hns_roce_v2_cq_context *context;
- struct nlattr *table_attr;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct hns_roce_v2_qp_context context;
+ u32 data[MAX_ENTRY_NUM] = {};
+ int offset = 0;
int ret;
- if (!hr_dev->dfx->query_cqc_info)
+ if (!hr_dev->hw->query_qpc)
return -EINVAL;
- context = kzalloc(sizeof(struct hns_roce_v2_cq_context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context);
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context);
if (ret)
- goto err;
+ return -EINVAL;
+
+ data[offset++] = hr_reg_read(&context, QPC_QP_ST);
+ data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE);
+ data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG);
+ data[offset++] = hr_reg_read(&context, QPC_SRQ_EN);
+ data[offset++] = hr_reg_read(&context, QPC_SRQN);
+ data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD);
+ data[offset++] = hr_reg_read(&context, QPC_TX_CQN);
+ data[offset++] = hr_reg_read(&context, QPC_RX_CQN);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT);
+ data[offset++] = hr_reg_read(&context, QPC_RQWS);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT);
+ data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ);
+ data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ);
+ data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
+ data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX);
+ data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR);
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+
+ return ret;
+}
+
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct nlattr *table_attr;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
- if (!table_attr) {
- ret = -EMSGSIZE;
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "pbl_hop_num", hr_mr->pbl_hop_num))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.ba_pg_shift))
goto err;
- }
- if (hns_roce_fill_cq(msg, context)) {
- ret = -EMSGSIZE;
- goto err_cancel_table;
- }
+ if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.buf_pg_shift))
+ goto err;
nla_nest_end(msg, table_attr);
- kfree(context);
return 0;
-err_cancel_table:
- nla_nest_cancel(msg, table_attr);
err:
- kfree(context);
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device);
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct hns_roce_v2_mpt_entry context;
+ u32 data[MAX_ENTRY_NUM] = {};
+ int offset = 0;
+ int ret;
+
+ if (!hr_dev->hw->query_mpt)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context);
+ if (ret)
+ return -EINVAL;
+
+ data[offset++] = hr_reg_read(&context, MPT_ST);
+ data[offset++] = hr_reg_read(&context, MPT_PD);
+ data[offset++] = hr_reg_read(&context, MPT_LKEY);
+ data[offset++] = hr_reg_read(&context, MPT_LEN_L);
+ data[offset++] = hr_reg_read(&context, MPT_LEN_H);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ);
+ data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ);
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 6eee9deadd12..8dae98f827eb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -59,58 +59,39 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
}
}
-static int hns_roce_hw_create_srq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long srq_num)
+static int alloc_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
- HNS_ROCE_CMD_CREATE_SRQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long srq_num)
-{
- return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
- mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
-{
- struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida;
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
int id;
id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max,
GFP_KERNEL);
if (id < 0) {
- ibdev_err(ibdev, "failed to alloc srq(%d).\n", id);
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc srq(%d).\n", id);
return -ENOMEM;
}
- srq->srqn = (unsigned long)id;
- ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
- if (ret) {
- ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret);
- goto err_out;
- }
+ srq->srqn = id;
- ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
- if (ret) {
- ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
- goto err_put;
- }
+ return 0;
+}
+
+static void free_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ ida_free(&hr_dev->srq_table.srq_ida.ida, (int)srq->srqn);
+}
+
+static int hns_roce_create_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR_OR_NULL(mailbox)) {
+ if (IS_ERR(mailbox)) {
ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
- ret = -ENOMEM;
- goto err_xa;
+ return PTR_ERR(mailbox);
}
ret = hr_dev->hw->write_srqc(srq, mailbox->buf);
@@ -119,24 +100,44 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
goto err_mbox;
}
- ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
- if (ret) {
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_SRQ,
+ srq->srqn);
+ if (ret)
ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
- goto err_mbox;
- }
+err_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret) {
+ ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret) {
+ ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
+ goto err_put;
+ }
+
+ ret = hns_roce_create_srqc(hr_dev, srq);
+ if (ret)
+ goto err_xa;
return 0;
-err_mbox:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_xa:
xa_erase(&srq_table->xa, srq->srqn);
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
-err_out:
- ida_free(&srq_ida->ida, id);
return ret;
}
@@ -146,7 +147,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
int ret;
- ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
+ srq->srqn);
if (ret)
dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
ret, srq->srqn);
@@ -158,7 +160,6 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
wait_for_completion(&srq->free);
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
- ida_free(&srq_table->srq_ida.ida, (int)srq->srqn);
}
static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
@@ -259,7 +260,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
static void free_srq_wrid(struct hns_roce_srq *srq)
{
- kfree(srq->wrid);
+ kvfree(srq->wrid);
srq->wrid = NULL;
}
@@ -406,10 +407,14 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
if (ret)
return ret;
- ret = alloc_srqc(hr_dev, srq);
+ ret = alloc_srqn(hr_dev, srq);
if (ret)
goto err_srq_buf;
+ ret = alloc_srqc(hr_dev, srq);
+ if (ret)
+ goto err_srqn;
+
if (udata) {
resp.srqn = srq->srqn;
if (ib_copy_to_udata(udata, &resp,
@@ -428,6 +433,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
err_srqc:
free_srqc(hr_dev, srq);
+err_srqn:
+ free_srqn(hr_dev, srq);
err_srq_buf:
free_srq_buf(hr_dev, srq);
@@ -440,6 +447,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
free_srqc(hr_dev, srq);
+ free_srqn(hr_dev, srq);
free_srq_buf(hr_dev, srq);
return 0;
}
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 6dea0a49d171..7b086fe63a24 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
list_for_each_entry (listen_node, &cm_core->listen_list, list) {
memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
listen_port = listen_node->loc_port;
+ if (listen_port != dst_port ||
+ !(listener_state & listen_node->listener_state))
+ continue;
/* compare node pair, return node handle if a match */
- if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
- !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
- listen_port == dst_port &&
- vlan_id == listen_node->vlan_id &&
- (listener_state & listen_node->listener_state)) {
+ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
+ (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
+ vlan_id == listen_node->vlan_id)) {
refcount_inc(&listen_node->refcnt);
spin_unlock_irqrestore(&cm_core->listen_list_lock,
flags);
@@ -1501,15 +1502,14 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
* @cm_info: CM info for parent listen node
* @cm_parent_listen_node: The parent listen node
*/
-static enum irdma_status_code
-irdma_del_multiple_qhash(struct irdma_device *iwdev,
- struct irdma_cm_info *cm_info,
- struct irdma_cm_listener *cm_parent_listen_node)
+static int irdma_del_multiple_qhash(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
{
struct irdma_cm_listener *child_listen_node;
- enum irdma_status_code ret = IRDMA_ERR_CFG;
struct list_head *pos, *tpos;
unsigned long flags;
+ int ret = -EINVAL;
spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
list_for_each_safe (pos, tpos,
@@ -1618,16 +1618,16 @@ u16 irdma_get_vlan_ipv4(u32 *addr)
* Adds a qhash and a child listen node for every IPv6 address
* on the adapter and adds the associated qhash filter
*/
-static enum irdma_status_code
-irdma_add_mqh_6(struct irdma_device *iwdev, struct irdma_cm_info *cm_info,
- struct irdma_cm_listener *cm_parent_listen_node)
+static int irdma_add_mqh_6(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
{
struct net_device *ip_dev;
struct inet6_dev *idev;
struct inet6_ifaddr *ifp, *tmp;
- enum irdma_status_code ret = 0;
struct irdma_cm_listener *child_listen_node;
unsigned long flags;
+ int ret = 0;
rtnl_lock();
for_each_netdev(&init_net, ip_dev) {
@@ -1653,7 +1653,7 @@ irdma_add_mqh_6(struct irdma_device *iwdev, struct irdma_cm_info *cm_info,
child_listen_node);
if (!child_listen_node) {
ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n");
- ret = IRDMA_ERR_NO_MEMORY;
+ ret = -ENOMEM;
goto exit;
}
@@ -1700,16 +1700,16 @@ exit:
* Adds a qhash and a child listen node for every IPv4 address
* on the adapter and adds the associated qhash filter
*/
-static enum irdma_status_code
-irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info,
- struct irdma_cm_listener *cm_parent_listen_node)
+static int irdma_add_mqh_4(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
{
struct net_device *ip_dev;
struct in_device *idev;
struct irdma_cm_listener *child_listen_node;
- enum irdma_status_code ret = 0;
unsigned long flags;
const struct in_ifaddr *ifa;
+ int ret = 0;
rtnl_lock();
for_each_netdev(&init_net, ip_dev) {
@@ -1734,7 +1734,7 @@ irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info,
if (!child_listen_node) {
ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n");
in_dev_put(idev);
- ret = IRDMA_ERR_NO_MEMORY;
+ ret = -ENOMEM;
goto exit;
}
@@ -1781,9 +1781,9 @@ exit:
* @cm_info: CM info for parent listen node
* @cm_listen_node: The parent listen node
*/
-static enum irdma_status_code
-irdma_add_mqh(struct irdma_device *iwdev, struct irdma_cm_info *cm_info,
- struct irdma_cm_listener *cm_listen_node)
+static int irdma_add_mqh(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_listen_node)
{
if (cm_info->ipv4)
return irdma_add_mqh_4(iwdev, cm_info, cm_listen_node);
@@ -2200,7 +2200,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
/* set our node specific transport info */
cm_node->ipv4 = cm_info->ipv4;
cm_node->vlan_id = cm_info->vlan_id;
- if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb)
+ if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
cm_node->vlan_id = 0;
cm_node->tos = cm_info->tos;
cm_node->user_pri = cm_info->user_pri;
@@ -2209,8 +2209,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
ibdev_warn(&iwdev->ibdev,
"application TOS[%d] and remote client TOS[%d] mismatch\n",
listener->tos, cm_info->tos);
- cm_node->tos = max(listener->tos, cm_info->tos);
- cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ if (iwdev->vsi.dscp_mode) {
+ cm_node->user_pri = listener->user_pri;
+ } else {
+ cm_node->tos = max(listener->tos, cm_info->tos);
+ cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ }
ibdev_dbg(&iwdev->ibdev,
"DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos,
cm_node->user_pri);
@@ -2305,10 +2309,8 @@ err:
return NULL;
}
-static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
+static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
{
- struct irdma_cm_node *cm_node =
- container_of(rcu_head, struct irdma_cm_node, rcu_head);
struct irdma_cm_core *cm_core = cm_node->cm_core;
struct irdma_qp *iwqp;
struct irdma_cm_info nfo;
@@ -2356,7 +2358,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
}
cm_core->cm_free_ah(cm_node);
- kfree(cm_node);
}
/**
@@ -2384,8 +2385,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- /* wait for all list walkers to exit their grace period */
- call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
+ irdma_destroy_connection(cm_node);
+
+ kfree_rcu(cm_node, rcu_head);
}
/**
@@ -3201,8 +3203,7 @@ static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node)
* @iwdev: iwarp device structure
* @rdma_ver: HW version
*/
-enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev,
- u8 rdma_ver)
+int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
{
struct irdma_cm_core *cm_core = &iwdev->cm_core;
@@ -3212,7 +3213,7 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev,
/* Handles CM event work items send to Iwarp core */
cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0);
if (!cm_core->event_wq)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
INIT_LIST_HEAD(&cm_core->listen_list);
@@ -3244,15 +3245,10 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev,
*/
void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
{
- unsigned long flags;
-
if (!cm_core)
return;
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (timer_pending(&cm_core->tcp_timer))
- del_timer_sync(&cm_core->tcp_timer);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ del_timer_sync(&cm_core->tcp_timer);
destroy_workqueue(cm_core->event_wq);
cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
@@ -3465,12 +3461,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
}
cm_id = iwqp->cm_id;
- /* make sure we havent already closed this connection */
- if (!cm_id) {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return;
- }
-
original_hw_tcp_state = iwqp->hw_tcp_state;
original_ibqp_state = iwqp->ibqp_state;
last_ae = iwqp->last_aeq;
@@ -3492,11 +3482,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
disconn_status = -ECONNRESET;
}
- if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
- original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
- last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
- last_ae == IRDMA_AE_BAD_CLOSE ||
- last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
+ if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+ original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+ last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+ last_ae == IRDMA_AE_BAD_CLOSE ||
+ last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
issue_close = 1;
iwqp->cm_id = NULL;
qp->term_flags = 0;
@@ -3835,7 +3825,11 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_info.cm_id = cm_id;
cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
cm_info.tos = cm_id->tos;
- cm_info.user_pri = rt_tos2priority(cm_id->tos);
+ if (iwdev->vsi.dscp_mode)
+ cm_info.user_pri =
+ iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)];
+ else
+ cm_info.user_pri = rt_tos2priority(cm_id->tos);
if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri))
return -ENOMEM;
@@ -3915,10 +3909,10 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
struct irdma_device *iwdev;
struct irdma_cm_listener *cm_listen_node;
struct irdma_cm_info cm_info = {};
- enum irdma_status_code err;
struct sockaddr_in *laddr;
struct sockaddr_in6 *laddr6;
bool wildcard = false;
+ int err;
iwdev = to_iwdev(cm_id->device);
if (!iwdev)
@@ -3959,7 +3953,7 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
}
}
- if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb)
+ if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
cm_info.vlan_id = 0;
cm_info.backlog = backlog;
cm_info.cm_id = cm_id;
@@ -3977,7 +3971,11 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = cm_listen_node;
cm_listen_node->tos = cm_id->tos;
- cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+ if (iwdev->vsi.dscp_mode)
+ cm_listen_node->user_pri =
+ iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)];
+ else
+ cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
cm_info.user_pri = cm_listen_node->user_pri;
if (!cm_listen_node->reused_node) {
if (wildcard) {
@@ -4234,10 +4232,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
struct irdma_cm_node *cm_node;
struct list_head teardown_list;
struct ib_qp_attr attr;
- struct irdma_sc_vsi *vsi = &iwdev->vsi;
- struct irdma_sc_qp *sc_qp;
- struct irdma_qp *qp;
- int i;
INIT_LIST_HEAD(&teardown_list);
@@ -4254,52 +4248,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
irdma_cm_disconn(cm_node->iwqp);
irdma_rem_ref_cm_node(cm_node);
}
- if (!iwdev->roce_mode)
- return;
-
- INIT_LIST_HEAD(&teardown_list);
- for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
- mutex_lock(&vsi->qos[i].qos_mutex);
- list_for_each_safe (list_node, list_core_temp,
- &vsi->qos[i].qplist) {
- u32 qp_ip[4];
-
- sc_qp = container_of(list_node, struct irdma_sc_qp,
- list);
- if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
- continue;
-
- qp = sc_qp->qp_uk.back_qp;
- if (!disconnect_all) {
- if (nfo->ipv4)
- qp_ip[0] = qp->udp_info.local_ipaddr[3];
- else
- memcpy(qp_ip,
- &qp->udp_info.local_ipaddr[0],
- sizeof(qp_ip));
- }
-
- if (disconnect_all ||
- (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) &&
- !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
- spin_lock(&iwdev->rf->qptable_lock);
- if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
- irdma_qp_add_ref(&qp->ibqp);
- list_add(&qp->teardown_entry,
- &teardown_list);
- }
- spin_unlock(&iwdev->rf->qptable_lock);
- }
- }
- mutex_unlock(&vsi->qos[i].qos_mutex);
- }
-
- list_for_each_safe (list_node, list_core_temp, &teardown_list) {
- qp = container_of(list_node, struct irdma_qp, teardown_entry);
- attr.qp_state = IB_QPS_ERR;
- irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
- irdma_qp_rem_ref(&qp->ibqp);
- }
}
/**
@@ -4325,11 +4273,11 @@ static void irdma_qhash_ctrl(struct irdma_device *iwdev,
struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
struct irdma_cm_listener *child_listen_node;
struct list_head *pos, *tpos;
- enum irdma_status_code err;
bool node_allocated = false;
enum irdma_quad_hash_manage_type op = ifup ?
IRDMA_QHASH_MANAGE_TYPE_ADD :
IRDMA_QHASH_MANAGE_TYPE_DELETE;
+ int err;
list_for_each_safe (pos, tpos, child_listen_list) {
child_listen_node = list_entry(pos, struct irdma_cm_listener,
diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
index 3bf42728e9b7..19c284975fc7 100644
--- a/drivers/infiniband/hw/irdma/cm.h
+++ b/drivers/infiniband/hw/irdma/cm.h
@@ -384,6 +384,13 @@ int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
struct irdma_puda_buf *sqbuf,
enum irdma_timer_type type, int send_retrans,
int close_when_complete);
+
+static inline u8 irdma_tos2dscp(u8 tos)
+{
+#define IRDMA_DSCP_VAL GENMASK(7, 2)
+ return (u8)FIELD_GET(IRDMA_DSCP_VAL, tos);
+}
+
int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index 7264f8c2f7d5..a41e0d21143a 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include <linux/etherdevice.h>
+
#include "osdep.h"
-#include "status.h"
#include "hmc.h"
#include "defs.h"
#include "type.h"
@@ -68,6 +69,31 @@ void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op)
}
}
+static void irdma_set_qos_info(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2p)
+{
+ u8 i;
+
+ vsi->qos_rel_bw = l2p->vsi_rel_bw;
+ vsi->qos_prio_type = l2p->vsi_prio_type;
+ vsi->dscp_mode = l2p->dscp_mode;
+ if (l2p->dscp_mode) {
+ memcpy(vsi->dscp_map, l2p->dscp_map, sizeof(vsi->dscp_map));
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ l2p->up2tc[i] = i;
+ }
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ vsi->qos[i].qs_handle = l2p->qs_handle_list[i];
+ vsi->qos[i].traffic_class = l2p->up2tc[i];
+ vsi->qos[i].rel_bw =
+ l2p->tc_info[vsi->qos[i].traffic_class].rel_bw;
+ vsi->qos[i].prio_type =
+ l2p->tc_info[vsi->qos[i].traffic_class].prio_type;
+ vsi->qos[i].valid = false;
+ }
+}
+
/**
* irdma_change_l2params - given the new l2 parameters, change all qp
* @vsi: RDMA VSI pointer
@@ -86,6 +112,7 @@ void irdma_change_l2params(struct irdma_sc_vsi *vsi,
return;
vsi->tc_change_pending = false;
+ irdma_set_qos_info(vsi, l2params);
irdma_sc_suspend_resume_qps(vsi, IRDMA_OP_RESUME);
}
@@ -152,17 +179,16 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp,
- struct irdma_add_arp_cache_entry_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_add_arp_cache_entry_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
u64 hdr;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 8, info->reach_max);
set_64bit_val(wqe, 16, ether_addr_to_u64(info->mac_addr));
@@ -190,16 +216,15 @@ irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp,
* @arp_index: arp index to delete arp entry
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch,
- u16 arp_index, bool post_sq)
+static int irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ u16 arp_index, bool post_sq)
{
__le64 *wqe;
u64 hdr;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
hdr = arp_index |
FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) |
@@ -224,17 +249,16 @@ irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp,
- struct irdma_apbvt_info *info, u64 scratch,
- bool post_sq)
+static int irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_apbvt_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
u64 hdr;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, info->port);
@@ -272,7 +296,7 @@ irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp,
* quad hash entry in the hardware will point to iwarp's qp
* number and requires no calls from the driver.
*/
-static enum irdma_status_code
+static int
irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp,
struct irdma_qhash_table_info *info,
u64 scratch, bool post_sq)
@@ -285,7 +309,7 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp,
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr));
@@ -348,10 +372,9 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp,
* @qp: sc qp
* @info: initialization qp info
*/
-enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp,
- struct irdma_qp_init_info *info)
+int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info)
{
- enum irdma_status_code ret_code;
+ int ret_code;
u32 pble_obj_cnt;
u16 wqe_size;
@@ -359,7 +382,7 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp,
info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags ||
info->qp_uk_init_info.max_rq_frag_cnt >
info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags)
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
qp->dev = info->pd->dev;
qp->vsi = info->vsi;
@@ -382,7 +405,7 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp,
if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) ||
(info->virtual_map && info->rq_pa >= pble_obj_cnt))
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
qp->llp_stream_handle = (void *)(-1);
qp->hw_sq_size = irdma_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
@@ -422,8 +445,8 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info,
- u64 scratch, bool post_sq)
+int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info,
+ u64 scratch, bool post_sq)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
@@ -431,12 +454,12 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c
cqp = qp->dev->cqp;
if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id ||
- qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1))
- return IRDMA_ERR_INVALID_QP_ID;
+ qp->qp_uk.qp_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt)
+ return -EINVAL;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
set_64bit_val(wqe, 40, qp->shadow_area_pa);
@@ -473,9 +496,8 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp,
- struct irdma_modify_qp_info *info,
- u64 scratch, bool post_sq)
+int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
@@ -486,7 +508,7 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp,
cqp = qp->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
if (info->next_iwarp_state == IRDMA_QP_STATE_TERMINATE) {
if (info->dont_send_fin)
@@ -544,9 +566,8 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp,
* @ignore_mw_bnd: memory window bind flag
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
- bool remove_hash_idx, bool ignore_mw_bnd,
- bool post_sq)
+int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
+ bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
@@ -555,7 +576,7 @@ enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
cqp = qp->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
set_64bit_val(wqe, 40, qp->shadow_area_pa);
@@ -737,16 +758,15 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
- bool post_sq)
+static int irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ bool post_sq)
{
__le64 *wqe;
u64 hdr;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE,
IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY) |
@@ -772,17 +792,16 @@ irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp,
- struct irdma_local_mac_entry_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_local_mac_entry_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
u64 header;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 32, ether_addr_to_u64(info->mac_addr));
@@ -811,16 +830,16 @@ irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp,
* @ignore_ref_count: to force mac adde delete
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
- u16 entry_idx, u8 ignore_ref_count, bool post_sq)
+static int irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ u16 entry_idx, u8 ignore_ref_count,
+ bool post_sq)
{
__le64 *wqe;
u64 header;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, entry_idx) |
FIELD_PREP(IRDMA_CQPSQ_OPCODE,
IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) |
@@ -1033,10 +1052,9 @@ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
- struct irdma_allocate_stag_info *info, u64 scratch,
- bool post_sq)
+static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
+ struct irdma_allocate_stag_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
@@ -1053,7 +1071,7 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 8,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) |
@@ -1095,10 +1113,9 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
- struct irdma_reg_ns_stag_info *info, u64 scratch,
- bool post_sq)
+static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
+ struct irdma_reg_ns_stag_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
u64 fbo;
@@ -1116,7 +1133,7 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
else if (info->page_size == 0x1000)
page_size = IRDMA_PAGE_SIZE_4K;
else
- return IRDMA_ERR_PARAM;
+ return -EINVAL;
if (info->access_rights & (IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY |
IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY))
@@ -1126,12 +1143,12 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
pble_obj_cnt = dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if (info->chunk_size && info->first_pm_pbl_index >= pble_obj_cnt)
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
fbo = info->va & (info->page_size - 1);
set_64bit_val(wqe, 0,
@@ -1184,10 +1201,9 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
- struct irdma_dealloc_stag_info *info, u64 scratch,
- bool post_sq)
+static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
+ struct irdma_dealloc_stag_info *info,
+ u64 scratch, bool post_sq)
{
u64 hdr;
__le64 *wqe;
@@ -1196,7 +1212,7 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 8,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
@@ -1225,9 +1241,9 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev,
+ struct irdma_mw_alloc_info *info, u64 scratch,
+ bool post_sq)
{
u64 hdr;
struct irdma_sc_cqp *cqp;
@@ -1236,7 +1252,7 @@ irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info,
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 8,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
@@ -1266,9 +1282,9 @@ irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info,
* @info: fast mr info
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code
-irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
- struct irdma_fast_reg_stag_info *info, bool post_sq)
+int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq)
{
u64 temp, hdr;
__le64 *wqe;
@@ -1290,7 +1306,7 @@ irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx,
IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(&qp->qp_uk, wqe_idx);
@@ -1819,8 +1835,7 @@ void irdma_terminate_received(struct irdma_sc_qp *qp,
}
}
-static enum irdma_status_code irdma_null_ws_add(struct irdma_sc_vsi *vsi,
- u8 user_pri)
+static int irdma_null_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
{
return 0;
}
@@ -1843,7 +1858,6 @@ static void irdma_null_ws_reset(struct irdma_sc_vsi *vsi)
void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
struct irdma_vsi_init_info *info)
{
- struct irdma_l2params *l2p;
int i;
vsi->dev = info->dev;
@@ -1856,18 +1870,8 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
vsi->fcn_id = info->dev->hmc_fn_id;
- l2p = info->params;
- vsi->qos_rel_bw = l2p->vsi_rel_bw;
- vsi->qos_prio_type = l2p->vsi_prio_type;
+ irdma_set_qos_info(vsi, info->params);
for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
- if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
- vsi->qos[i].qs_handle = l2p->qs_handle_list[i];
- vsi->qos[i].traffic_class = info->params->up2tc[i];
- vsi->qos[i].rel_bw =
- l2p->tc_info[vsi->qos[i].traffic_class].rel_bw;
- vsi->qos[i].prio_type =
- l2p->tc_info[vsi->qos[i].traffic_class].prio_type;
- vsi->qos[i].valid = false;
mutex_init(&vsi->qos[i].qos_mutex);
INIT_LIST_HEAD(&vsi->qos[i].qplist);
}
@@ -1916,8 +1920,8 @@ static u8 irdma_get_fcn_id(struct irdma_sc_vsi *vsi)
* @vsi: pointer to the vsi structure
* @info: The info structure used for initialization
*/
-enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
- struct irdma_vsi_stats_info *info)
+int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info)
{
u8 fcn_id = info->fcn_id;
struct irdma_dma_mem *stats_buff_mem;
@@ -1932,7 +1936,7 @@ enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
&stats_buff_mem->pa,
GFP_KERNEL);
if (!stats_buff_mem->va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
vsi->pestat->gather_info.gather_stats_va = stats_buff_mem->va;
vsi->pestat->gather_info.last_gather_stats_va =
@@ -1959,7 +1963,7 @@ stats_error:
stats_buff_mem->va, stats_buff_mem->pa);
stats_buff_mem->va = NULL;
- return IRDMA_ERR_CQP_COMPL_ERROR;
+ return -EIO;
}
/**
@@ -2021,19 +2025,19 @@ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type)
* @info: gather stats info structure
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code
-irdma_sc_gather_stats(struct irdma_sc_cqp *cqp,
- struct irdma_stats_gather_info *info, u64 scratch)
+static int irdma_sc_gather_stats(struct irdma_sc_cqp *cqp,
+ struct irdma_stats_gather_info *info,
+ u64 scratch)
{
__le64 *wqe;
u64 temp;
if (info->stats_buff_mem.size < IRDMA_GATHER_STATS_BUF_SIZE)
- return IRDMA_ERR_BUF_TOO_SHORT;
+ return -ENOMEM;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 40,
FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fcn_index));
@@ -2068,17 +2072,16 @@ irdma_sc_gather_stats(struct irdma_sc_cqp *cqp,
* @alloc: alloc vs. delete flag
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code
-irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp,
- struct irdma_stats_inst_info *info, bool alloc,
- u64 scratch)
+static int irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp,
+ struct irdma_stats_inst_info *info,
+ bool alloc, u64 scratch)
{
__le64 *wqe;
u64 temp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 40,
FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id));
@@ -2106,9 +2109,8 @@ irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp,
* @info: User priority map info
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp,
- struct irdma_up_info *info,
- u64 scratch)
+static int irdma_sc_set_up_map(struct irdma_sc_cqp *cqp,
+ struct irdma_up_info *info, u64 scratch)
{
__le64 *wqe;
u64 temp = 0;
@@ -2116,7 +2118,7 @@ static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp,
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
temp |= (u64)info->map[i] << (i * 8);
@@ -2149,17 +2151,16 @@ static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp,
* @node_op: 0 for add 1 for modify, 2 for delete
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code
-irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp,
- struct irdma_ws_node_info *info,
- enum irdma_ws_node_op node_op, u64 scratch)
+static int irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp,
+ struct irdma_ws_node_info *info,
+ enum irdma_ws_node_op node_op, u64 scratch)
{
__le64 *wqe;
u64 temp = 0;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 32,
FIELD_PREP(IRDMA_CQPSQ_WS_VSI, info->vsi) |
@@ -2192,9 +2193,9 @@ irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
- struct irdma_qp_flush_info *info,
- u64 scratch, bool post_sq)
+int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, u64 scratch,
+ bool post_sq)
{
u64 temp = 0;
__le64 *wqe;
@@ -2213,13 +2214,13 @@ enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
ibdev_dbg(to_ibdev(qp->dev),
"CQP: Additional flush request ignored for qp %x\n",
qp->qp_uk.qp_id);
- return IRDMA_ERR_FLUSHED_Q;
+ return -EALREADY;
}
cqp = qp->pd->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
if (info->userflushcode) {
if (flush_rq)
@@ -2266,9 +2267,9 @@ enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp,
- struct irdma_gen_ae_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_gen_ae(struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, u64 scratch,
+ bool post_sq)
{
u64 temp;
__le64 *wqe;
@@ -2278,7 +2279,7 @@ static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp,
cqp = qp->pd->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
temp = info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
info->ae_src);
@@ -2306,10 +2307,9 @@ static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_qp_upload_context(struct irdma_sc_dev *dev,
- struct irdma_upload_context_info *info, u64 scratch,
- bool post_sq)
+static int irdma_sc_qp_upload_context(struct irdma_sc_dev *dev,
+ struct irdma_upload_context_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
@@ -2318,7 +2318,7 @@ irdma_sc_qp_upload_context(struct irdma_sc_dev *dev,
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, info->buf_pa);
@@ -2347,21 +2347,20 @@ irdma_sc_qp_upload_context(struct irdma_sc_dev *dev,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp,
- struct irdma_cqp_manage_push_page_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_manage_push_page_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
u64 hdr;
if (info->free_page &&
info->push_idx >= cqp->dev->hw_attrs.max_hw_device_pages)
- return IRDMA_ERR_INVALID_PUSH_PAGE_INDEX;
+ return -EINVAL;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, info->qs_handle);
hdr = FIELD_PREP(IRDMA_CQPSQ_MPP_PPIDX, info->push_idx) |
@@ -2387,16 +2386,15 @@ irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp,
* @qp: sc qp struct
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp,
- struct irdma_sc_qp *qp,
- u64 scratch)
+static int irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
+ u64 scratch)
{
u64 hdr;
__le64 *wqe;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
hdr = FIELD_PREP(IRDMA_CQPSQ_SUSPENDQP_QPID, qp->qp_uk.qp_id) |
FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SUSPEND_QP) |
@@ -2418,16 +2416,15 @@ static enum irdma_status_code irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp,
* @qp: sc qp struct
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code irdma_sc_resume_qp(struct irdma_sc_cqp *cqp,
- struct irdma_sc_qp *qp,
- u64 scratch)
+static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
+ u64 scratch)
{
u64 hdr;
__le64 *wqe;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16,
FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QSHANDLE, qp->qs_handle));
@@ -2460,14 +2457,13 @@ static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq)
* @cq: cq struct
* @info: cq initialization info
*/
-enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq,
- struct irdma_cq_init_info *info)
+int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info)
{
u32 pble_obj_cnt;
pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
cq->cq_pa = info->cq_base_pa;
cq->dev = info->dev;
@@ -2498,23 +2494,21 @@ enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq,
* @check_overflow: flag for overflow check
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq,
- u64 scratch,
- bool check_overflow,
- bool post_sq)
+static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
+ bool check_overflow, bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
u64 hdr;
struct irdma_sc_ceq *ceq;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
cqp = cq->dev->cqp;
- if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1))
- return IRDMA_ERR_INVALID_CQ_ID;
+ if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt)
+ return -EINVAL;
- if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1))
- return IRDMA_ERR_INVALID_CEQ_ID;
+ if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
ceq = cq->dev->ceq[cq->ceq_id];
if (ceq && ceq->reg_cq)
@@ -2527,7 +2521,7 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq,
if (!wqe) {
if (ceq && ceq->reg_cq)
irdma_sc_remove_cq_ctx(ceq, cq);
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
}
set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
@@ -2573,8 +2567,7 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch,
- bool post_sq)
+int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
@@ -2584,7 +2577,7 @@ enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch,
cqp = cq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
ceq = cq->dev->ceq[cq->ceq_id];
if (ceq && ceq->reg_cq)
@@ -2640,9 +2633,9 @@ void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *inf
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag to post to sq
*/
-static enum irdma_status_code
-irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
+ struct irdma_modify_cq_info *info, u64 scratch,
+ bool post_sq)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
@@ -2652,12 +2645,12 @@ irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info,
pble_obj_cnt = cq->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if (info->cq_resize && info->virtual_map &&
info->first_pm_pbl_idx >= pble_obj_cnt)
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
cqp = cq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 0, info->cq_size);
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
@@ -2731,8 +2724,8 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val,
* @tail: wqtail register value
* @count: how many times to try for completion
*/
-static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp,
- u32 tail, u32 count)
+static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail,
+ u32 count)
{
u32 i = 0;
u32 newtail, error, val;
@@ -2744,7 +2737,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp,
ibdev_dbg(to_ibdev(cqp->dev),
"CQP: CQPERRCODES error_code[x%08X]\n",
error);
- return IRDMA_ERR_CQP_COMPL_ERROR;
+ return -EIO;
}
if (newtail != tail) {
/* SUCCESS */
@@ -2755,7 +2748,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp,
udelay(cqp->dev->hw_attrs.max_sleep_count);
}
- return IRDMA_ERR_TIMEOUT;
+ return -ETIMEDOUT;
}
/**
@@ -2910,10 +2903,9 @@ static u64 irdma_sc_decode_fpm_query(__le64 *buf, u32 buf_idx,
* parses fpm query buffer and copy max_cnt and
* size value of hmc objects in hmc_info
*/
-static enum irdma_status_code
-irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
- struct irdma_hmc_info *hmc_info,
- struct irdma_hmc_fpm_misc *hmc_fpm_misc)
+static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
+ struct irdma_hmc_info *hmc_info,
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc)
{
struct irdma_hmc_obj_info *obj_info;
u64 temp;
@@ -2952,7 +2944,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
obj_info[IRDMA_HMC_IW_XFFL].size = 4;
hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp);
if (!hmc_fpm_misc->xf_block_size)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1);
get_64bit_val(buf, 80, &temp);
@@ -2961,7 +2953,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
hmc_fpm_misc->q1_block_size = FIELD_GET(IRDMA_QUERY_FPM_Q1BLOCKSIZE, temp);
if (!hmc_fpm_misc->q1_block_size)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
irdma_sc_decode_fpm_query(buf, 88, obj_info, IRDMA_HMC_IW_TIMER);
@@ -2985,7 +2977,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
hmc_fpm_misc->rrf_block_size = FIELD_GET(IRDMA_QUERY_FPM_RRFBLOCKSIZE, temp);
if (!hmc_fpm_misc->rrf_block_size &&
obj_info[IRDMA_HMC_IW_RRFFL].max_cnt)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR);
irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD);
@@ -2997,7 +2989,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp);
if (!hmc_fpm_misc->ooiscf_block_size &&
obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
return 0;
}
@@ -3025,8 +3017,7 @@ static u32 irdma_sc_find_reg_cq(struct irdma_sc_ceq *ceq,
* @ceq: ceq sc structure
* @cq: cq sc structure
*/
-enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq,
- struct irdma_sc_cq *cq)
+int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq)
{
unsigned long flags;
@@ -3034,7 +3025,7 @@ enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq,
if (ceq->reg_cq_size == ceq->elem_cnt) {
spin_unlock_irqrestore(&ceq->req_cq_lock, flags);
- return IRDMA_ERR_REG_CQ_FULL;
+ return -ENOMEM;
}
ceq->reg_cq[ceq->reg_cq_size++] = cq;
@@ -3075,15 +3066,15 @@ exit:
*
* Initializes the object and context buffers for a control Queue Pair.
*/
-enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
- struct irdma_cqp_init_info *info)
+int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_init_info *info)
{
u8 hw_sq_size;
if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 ||
info->sq_size < IRDMA_CQP_SW_SQSIZE_4 ||
((info->sq_size & (info->sq_size - 1))))
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
hw_sq_size = irdma_get_encoded_wqe_size(info->sq_size,
IRDMA_QUEUE_TYPE_CQP);
@@ -3133,13 +3124,12 @@ enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
* @maj_err: If error, major err number
* @min_err: If error, minor err number
*/
-enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err,
- u16 *min_err)
+int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
{
u64 temp;
u8 hw_rev;
u32 cnt = 0, p1, p2, val = 0, err_code;
- enum irdma_status_code ret_code;
+ int ret_code;
hw_rev = cqp->dev->hw_attrs.uk_attrs.hw_rev;
cqp->sdbuf.size = ALIGN(IRDMA_UPDATE_SD_BUFF_SIZE * cqp->sq_size,
@@ -3148,7 +3138,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er
cqp->sdbuf.size, &cqp->sdbuf.pa,
GFP_KERNEL);
if (!cqp->sdbuf.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
spin_lock_init(&cqp->dev->cqp_lock);
@@ -3203,7 +3193,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er
do {
if (cnt++ > cqp->dev->hw_attrs.max_done_count) {
- ret_code = IRDMA_ERR_TIMEOUT;
+ ret_code = -ETIMEDOUT;
goto err;
}
udelay(cqp->dev->hw_attrs.max_sleep_count);
@@ -3211,7 +3201,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er
} while (!val);
if (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_ERR)) {
- ret_code = IRDMA_ERR_DEVICE_NOT_SUPPORTED;
+ ret_code = -EOPNOTSUPP;
goto err;
}
@@ -3252,7 +3242,7 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch
u32 *wqe_idx)
{
__le64 *wqe = NULL;
- enum irdma_status_code ret_code;
+ int ret_code;
if (IRDMA_RING_FULL_ERR(cqp->sq_ring)) {
ibdev_dbg(to_ibdev(cqp->dev),
@@ -3279,16 +3269,16 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch
* irdma_sc_cqp_destroy - destroy cqp during close
* @cqp: struct for cqp hw
*/
-enum irdma_status_code irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp)
+int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp)
{
u32 cnt = 0, val;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
writel(0, cqp->dev->hw_regs[IRDMA_CCQPHIGH]);
writel(0, cqp->dev->hw_regs[IRDMA_CCQPLOW]);
do {
if (cnt++ > cqp->dev->hw_attrs.max_done_count) {
- ret_code = IRDMA_ERR_TIMEOUT;
+ ret_code = -ETIMEDOUT;
break;
}
udelay(cqp->dev->hw_attrs.max_sleep_count);
@@ -3333,8 +3323,8 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
* @ccq: ccq sc struct
* @info: completion q entry to return
*/
-enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
- struct irdma_ccq_cqe_info *info)
+int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_cqe_info *info)
{
u64 qp_ctx, temp, temp1;
__le64 *cqe;
@@ -3342,7 +3332,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
u32 wqe_idx;
u32 error;
u8 polarity;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
if (ccq->cq_uk.avoid_mem_cflct)
cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk);
@@ -3352,7 +3342,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
get_64bit_val(cqe, 24, &temp);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, temp);
if (polarity != ccq->cq_uk.polarity)
- return IRDMA_ERR_Q_EMPTY;
+ return -ENOENT;
get_64bit_val(cqe, 8, &qp_ctx);
cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx;
@@ -3399,25 +3389,25 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
* @op_code: cqp opcode for completion
* @compl_info: completion q entry to return
*/
-enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code,
- struct irdma_ccq_cqe_info *compl_info)
+int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code,
+ struct irdma_ccq_cqe_info *compl_info)
{
struct irdma_ccq_cqe_info info = {};
struct irdma_sc_cq *ccq;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u32 cnt = 0;
ccq = cqp->dev->ccq;
while (1) {
if (cnt++ > 100 * cqp->dev->hw_attrs.max_done_count)
- return IRDMA_ERR_TIMEOUT;
+ return -ETIMEDOUT;
if (irdma_sc_ccq_get_cqe_info(ccq, &info)) {
udelay(cqp->dev->hw_attrs.max_sleep_count);
continue;
}
if (info.error && info.op_code != IRDMA_CQP_OP_QUERY_STAG) {
- ret_code = IRDMA_ERR_CQP_COMPL_ERROR;
+ ret_code = -EIO;
break;
}
/* make sure op code matches*/
@@ -3441,17 +3431,16 @@ enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u
* @info: info for the manage function table operation
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code
-irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp,
- struct irdma_hmc_fcn_info *info,
- u64 scratch, bool post_sq)
+static int irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp,
+ struct irdma_hmc_fcn_info *info,
+ u64 scratch, bool post_sq)
{
__le64 *wqe;
u64 hdr;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 0, 0);
set_64bit_val(wqe, 8, 0);
@@ -3484,8 +3473,7 @@ irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp,
* for fpm commit
* @cqp: struct for cqp hw
*/
-static enum irdma_status_code
-irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp)
+static int irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp)
{
return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_COMMIT_FPM_VAL,
NULL);
@@ -3500,19 +3488,19 @@ irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp)
* @post_sq: flag for cqp db to ring
* @wait_type: poll ccq or cqp registers for cqp completion
*/
-static enum irdma_status_code
-irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id,
- struct irdma_dma_mem *commit_fpm_mem, bool post_sq,
- u8 wait_type)
+static int irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id,
+ struct irdma_dma_mem *commit_fpm_mem,
+ bool post_sq, u8 wait_type)
{
__le64 *wqe;
u64 hdr;
u32 tail, val, error;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, hmc_fn_id);
set_64bit_val(wqe, 32, commit_fpm_mem->pa);
@@ -3546,8 +3534,7 @@ irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id,
* query fpm
* @cqp: struct for cqp hw
*/
-static enum irdma_status_code
-irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp)
+static int irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp)
{
return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_FPM_VAL,
NULL);
@@ -3562,19 +3549,19 @@ irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp)
* @post_sq: flag for cqp db to ring
* @wait_type: poll ccq or cqp registers for cqp completion
*/
-static enum irdma_status_code
-irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id,
- struct irdma_dma_mem *query_fpm_mem, bool post_sq,
- u8 wait_type)
+static int irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id,
+ struct irdma_dma_mem *query_fpm_mem,
+ bool post_sq, u8 wait_type)
{
__le64 *wqe;
u64 hdr;
u32 tail, val, error;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, hmc_fn_id);
set_64bit_val(wqe, 32, query_fpm_mem->pa);
@@ -3606,21 +3593,21 @@ irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id,
* @ceq: ceq sc structure
* @info: ceq initialization info
*/
-enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
- struct irdma_ceq_init_info *info)
+int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ struct irdma_ceq_init_info *info)
{
u32 pble_obj_cnt;
if (info->elem_cnt < info->dev->hw_attrs.min_hw_ceq_size ||
info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
- if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1))
- return IRDMA_ERR_INVALID_CEQ_ID;
+ if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
ceq->size = sizeof(*ceq);
ceq->ceqe_base = (struct irdma_ceqe *)info->ceqe_base;
@@ -3653,8 +3640,8 @@ enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch,
- bool post_sq)
+static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch,
+ bool post_sq)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
@@ -3663,7 +3650,7 @@ static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64
cqp = ceq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, ceq->elem_cnt);
set_64bit_val(wqe, 32,
(ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
@@ -3695,8 +3682,7 @@ static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64
* irdma_sc_cceq_create_done - poll for control ceq wqe to complete
* @ceq: ceq sc structure
*/
-static enum irdma_status_code
-irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq)
+static int irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq)
{
struct irdma_sc_cqp *cqp;
@@ -3709,7 +3695,7 @@ irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq)
* irdma_sc_cceq_destroy_done - poll for destroy cceq to complete
* @ceq: ceq sc structure
*/
-enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq)
+int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq)
{
struct irdma_sc_cqp *cqp;
@@ -3728,9 +3714,9 @@ enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq)
* @ceq: ceq sc structure
* @scratch: u64 saved to be used during cqp completion
*/
-enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch)
+int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch)
{
- enum irdma_status_code ret_code;
+ int ret_code;
struct irdma_sc_dev *dev = ceq->dev;
dev->ccq->vsi = ceq->vsi;
@@ -3753,8 +3739,7 @@ enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratc
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch,
- bool post_sq)
+int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
@@ -3763,7 +3748,7 @@ enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratc
cqp = ceq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, ceq->elem_cnt);
set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
@@ -3882,19 +3867,19 @@ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq)
* @aeq: aeq structure ptr
* @info: aeq initialization info
*/
-enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
- struct irdma_aeq_init_info *info)
+int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
+ struct irdma_aeq_init_info *info)
{
u32 pble_obj_cnt;
if (info->elem_cnt < info->dev->hw_attrs.min_hw_aeq_size ||
info->elem_cnt > info->dev->hw_attrs.max_hw_aeq_size)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
aeq->size = sizeof(*aeq);
aeq->polarity = 1;
@@ -3919,8 +3904,8 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq,
- u64 scratch, bool post_sq)
+static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch,
+ bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
@@ -3929,7 +3914,7 @@ static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq,
cqp = aeq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, aeq->elem_cnt);
set_64bit_val(wqe, 32,
(aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
@@ -3958,8 +3943,8 @@ static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq,
- u64 scratch, bool post_sq)
+static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch,
+ bool post_sq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
@@ -3972,7 +3957,7 @@ static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq,
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, aeq->elem_cnt);
set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) |
@@ -3995,8 +3980,8 @@ static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq,
* @aeq: aeq structure ptr
* @info: aeqe info to be returned
*/
-enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
- struct irdma_aeqe_info *info)
+int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ struct irdma_aeqe_info *info)
{
u64 temp, compl_ctx;
__le64 *aeqe;
@@ -4010,7 +3995,7 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp);
if (aeq->polarity != polarity)
- return IRDMA_ERR_Q_EMPTY;
+ return -ENOENT;
print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
aeqe, 16, false);
@@ -4155,22 +4140,21 @@ void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
* @cq: sc's cq ctruct
* @info: info for control cq initialization
*/
-enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq,
- struct irdma_ccq_init_info *info)
+int irdma_sc_ccq_init(struct irdma_sc_cq *cq, struct irdma_ccq_init_info *info)
{
u32 pble_obj_cnt;
if (info->num_elem < info->dev->hw_attrs.uk_attrs.min_hw_cq_size ||
info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
- if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1))
- return IRDMA_ERR_INVALID_CEQ_ID;
+ if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
- return IRDMA_ERR_INVALID_PBLE_INDEX;
+ return -EINVAL;
cq->cq_pa = info->cq_pa;
cq->cq_uk.cq_base = info->cq_base;
@@ -4207,7 +4191,7 @@ enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq,
* irdma_sc_ccq_create_done - poll cqp for ccq create
* @ccq: ccq sc struct
*/
-static inline enum irdma_status_code irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq)
+static inline int irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq)
{
struct irdma_sc_cqp *cqp;
@@ -4223,10 +4207,10 @@ static inline enum irdma_status_code irdma_sc_ccq_create_done(struct irdma_sc_cq
* @check_overflow: overlow flag for ccq
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
- bool check_overflow, bool post_sq)
+int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
+ bool check_overflow, bool post_sq)
{
- enum irdma_status_code ret_code;
+ int ret_code;
ret_code = irdma_sc_cq_create(ccq, scratch, check_overflow, post_sq);
if (ret_code)
@@ -4248,19 +4232,18 @@ enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
* @scratch: u64 saved to be used during cqp completion
* @post_sq: flag for cqp db to ring
*/
-enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch,
- bool post_sq)
+int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
u64 hdr;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u32 tail, val, error;
cqp = ccq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1);
@@ -4299,13 +4282,12 @@ enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch
* @dev : ptr to irdma_dev struct
* @hmc_fn_id: hmc function id
*/
-enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev,
- u8 hmc_fn_id)
+int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id)
{
struct irdma_hmc_info *hmc_info;
struct irdma_hmc_fpm_misc *hmc_fpm_misc;
struct irdma_dma_mem query_fpm_mem;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u8 wait_type;
hmc_info = dev->hmc_info;
@@ -4336,14 +4318,13 @@ enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev,
* @dev : ptr to irdma_dev struct
* @hmc_fn_id: hmc function id
*/
-static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev,
- u8 hmc_fn_id)
+static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id)
{
struct irdma_hmc_info *hmc_info;
struct irdma_hmc_obj_info *obj_info;
__le64 *buf;
struct irdma_dma_mem commit_fpm_mem;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u8 wait_type;
hmc_info = dev->hmc_info;
@@ -4406,9 +4387,8 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev,
* @info: sd info for wqe
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code
-cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info,
- u64 scratch)
+static int cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp,
+ struct irdma_update_sds_info *info, u64 scratch)
{
u64 data;
u64 hdr;
@@ -4420,7 +4400,7 @@ cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info,
wqe = irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
mem_entries = info->cnt - wqe_entries;
@@ -4486,12 +4466,11 @@ cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info,
* @info: sd info for sd's
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code
-irdma_update_pe_sds(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *info, u64 scratch)
+static int irdma_update_pe_sds(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info, u64 scratch)
{
struct irdma_sc_cqp *cqp = dev->cqp;
- enum irdma_status_code ret_code;
+ int ret_code;
ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
if (!ret_code)
@@ -4505,13 +4484,12 @@ irdma_update_pe_sds(struct irdma_sc_dev *dev,
* @dev: sc device struct
* @info: sd info for sd's
*/
-enum irdma_status_code
-irdma_update_sds_noccq(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *info)
+int irdma_update_sds_noccq(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info)
{
u32 error, val, tail;
struct irdma_sc_cqp *cqp = dev->cqp;
- enum irdma_status_code ret_code;
+ int ret_code;
ret_code = cqp_sds_wqe_fill(cqp, info, 0);
if (ret_code)
@@ -4532,10 +4510,9 @@ irdma_update_sds_noccq(struct irdma_sc_dev *dev,
* @post_sq: flag for cqp db to ring
* @poll_registers: flag to poll register for cqp completion
*/
-enum irdma_status_code
-irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
- u8 hmc_fn_id, bool post_sq,
- bool poll_registers)
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers)
{
u64 hdr;
__le64 *wqe;
@@ -4543,7 +4520,7 @@ irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16,
FIELD_PREP(IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID, hmc_fn_id));
@@ -4618,8 +4595,7 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev,
* irdma_sc_query_rdma_features_done - poll cqp for query features done
* @cqp: struct for cqp hw
*/
-static enum irdma_status_code
-irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp)
+static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp)
{
return irdma_sc_poll_for_cqp_op_done(cqp,
IRDMA_CQP_OP_QUERY_RDMA_FEATURES,
@@ -4632,16 +4608,15 @@ irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp)
* @buf: buffer to hold query info
* @scratch: u64 saved to be used during cqp completion
*/
-static enum irdma_status_code
-irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
- struct irdma_dma_mem *buf, u64 scratch)
+static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
+ struct irdma_dma_mem *buf, u64 scratch)
{
__le64 *wqe;
u64 temp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
temp = buf->pa;
set_64bit_val(wqe, 32, temp);
@@ -4665,9 +4640,9 @@ irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
* irdma_get_rdma_features - get RDMA features
* @dev: sc device struct
*/
-enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev)
+int irdma_get_rdma_features(struct irdma_sc_dev *dev)
{
- enum irdma_status_code ret_code;
+ int ret_code;
struct irdma_dma_mem feat_buf;
u64 temp;
u16 byte_idx, feat_type, feat_cnt, feat_idx;
@@ -4677,7 +4652,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev)
feat_buf.va = dma_alloc_coherent(dev->hw->device, feat_buf.size,
&feat_buf.pa, GFP_KERNEL);
if (!feat_buf.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
if (!ret_code)
@@ -4688,7 +4663,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev)
get_64bit_val(feat_buf.va, 0, &temp);
feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp);
if (feat_cnt < 2) {
- ret_code = IRDMA_ERR_INVALID_FEAT_CNT;
+ ret_code = -EINVAL;
goto exit;
} else if (feat_cnt > IRDMA_MAX_FEATURES) {
ibdev_dbg(to_ibdev(dev),
@@ -4702,7 +4677,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev)
feat_buf.size, &feat_buf.pa,
GFP_KERNEL);
if (!feat_buf.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
if (!ret_code)
@@ -4713,7 +4688,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev)
get_64bit_val(feat_buf.va, 0, &temp);
feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp);
if (feat_cnt < 2) {
- ret_code = IRDMA_ERR_INVALID_FEAT_CNT;
+ ret_code = -EINVAL;
goto exit;
}
}
@@ -4792,7 +4767,7 @@ static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev,
* @dev: sc device struct
* @qp_count: desired qp count
*/
-enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
+int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
{
struct irdma_virt_mem virt_mem;
u32 i, mem_size;
@@ -4803,7 +4778,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
u32 loop_count = 0;
struct irdma_hmc_info *hmc_info;
struct irdma_hmc_fpm_misc *hmc_fpm_misc;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
hmc_info = dev->hmc_info;
hmc_fpm_misc = &dev->hmc_fpm_misc;
@@ -4897,10 +4872,12 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
sd_diff = sd_needed - hmc_fpm_misc->max_sds;
if (sd_diff > 128) {
- if (qpwanted > 128 && sd_diff > 144)
+ if (!(loop_count % 2) && qpwanted > 128) {
qpwanted /= 2;
- mrwanted /= 2;
- pblewanted /= 2;
+ } else {
+ mrwanted /= 2;
+ pblewanted /= 2;
+ }
continue;
}
if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
@@ -4930,7 +4907,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
ibdev_dbg(to_ibdev(dev),
"HMC: cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n",
loop_count, sd_needed, hmc_info->sd_table.sd_cnt);
- return IRDMA_ERR_CFG;
+ return -EINVAL;
}
if (loop_count > 1 && sd_needed < hmc_fpm_misc->max_sds) {
@@ -4966,7 +4943,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
if (!virt_mem.va) {
ibdev_dbg(to_ibdev(dev),
"HMC: failed to allocate memory for sd_entry buffer\n");
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
hmc_info->sd_table.sd_entry = virt_mem.va;
@@ -4978,10 +4955,10 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
* @dev: rdma device
* @pcmdinfo: cqp command info
*/
-static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
- struct cqp_cmds_info *pcmdinfo)
+static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo)
{
- enum irdma_status_code status;
+ int status;
struct irdma_dma_mem val_mem;
bool alloc = false;
@@ -5243,7 +5220,7 @@ static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
pcmdinfo->in.u.mc_modify.scratch);
break;
default:
- status = IRDMA_NOT_SUPPORTED;
+ status = -EOPNOTSUPP;
break;
}
@@ -5255,10 +5232,10 @@ static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
* @dev: sc device struct
* @pcmdinfo: cqp command info
*/
-enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
- struct cqp_cmds_info *pcmdinfo)
+int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo)
{
- enum irdma_status_code status = 0;
+ int status = 0;
unsigned long flags;
spin_lock_irqsave(&dev->cqp_lock, flags);
@@ -5274,9 +5251,9 @@ enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
* irdma_process_bh - called from tasklet for cqp list
* @dev: sc device struct
*/
-enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev)
+int irdma_process_bh(struct irdma_sc_dev *dev)
{
- enum irdma_status_code status = 0;
+ int status = 0;
struct cqp_cmds_info *pcmdinfo;
unsigned long flags;
@@ -5364,12 +5341,11 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev)
* @dev: Device pointer
* @info: Device init info
*/
-enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver,
- struct irdma_sc_dev *dev,
- struct irdma_device_init_info *info)
+int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info)
{
u32 val;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u8 db_size;
INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */
@@ -5413,7 +5389,7 @@ enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver,
irdma_sc_init_hw(dev);
if (irdma_wait_pe_ready(dev))
- return IRDMA_ERR_TIMEOUT;
+ return -ETIMEDOUT;
val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
@@ -5421,7 +5397,7 @@ enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver,
ibdev_dbg(to_ibdev(dev),
"DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n",
val, db_size);
- return IRDMA_ERR_PE_DOORBELL_NOT_ENA;
+ return -ENODEV;
}
dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET];
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
index cc3d9a365b35..c1906cab5c8a 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -314,6 +314,7 @@ enum irdma_cqp_op_type {
#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
+#define IRDMA_AE_INVALID_REQUEST 0x0223
#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
@@ -964,7 +965,7 @@ enum irdma_cqp_op_type {
(_ring).head = ((_ring).head + 1) % size; \
(_retcode) = 0; \
} else { \
- (_retcode) = IRDMA_ERR_RING_FULL; \
+ (_retcode) = -ENOMEM; \
} \
}
#define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
@@ -975,7 +976,7 @@ enum irdma_cqp_op_type {
(_ring).head = ((_ring).head + (_count)) % size; \
(_retcode) = 0; \
} else { \
- (_retcode) = IRDMA_ERR_RING_FULL; \
+ (_retcode) = -ENOMEM; \
} \
}
#define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \
@@ -986,7 +987,7 @@ enum irdma_cqp_op_type {
(_ring).head = ((_ring).head + 1) % size; \
(_retcode) = 0; \
} else { \
- (_retcode) = IRDMA_ERR_RING_FULL; \
+ (_retcode) = -ENOMEM; \
} \
}
#define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
@@ -997,7 +998,7 @@ enum irdma_cqp_op_type {
(_ring).head = ((_ring).head + (_count)) % size; \
(_retcode) = 0; \
} else { \
- (_retcode) = IRDMA_ERR_RING_FULL; \
+ (_retcode) = -ENOMEM; \
} \
}
#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c
index ecffcb93c05a..49307ce8c4da 100644
--- a/drivers/infiniband/hw/irdma/hmc.c
+++ b/drivers/infiniband/hw/irdma/hmc.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
-#include "status.h"
#include "hmc.h"
#include "defs.h"
#include "type.h"
@@ -121,10 +120,8 @@ static inline void irdma_invalidate_pf_hmc_pd(struct irdma_sc_dev *dev, u32 sd_i
* @type: paged or direct sd
* @setsd: flag to set or clear sd
*/
-enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id,
- u64 pa, u32 sd_idx,
- enum irdma_sd_entry_type type,
- bool setsd)
+int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx,
+ enum irdma_sd_entry_type type, bool setsd)
{
struct irdma_update_sds_info sdinfo;
@@ -145,16 +142,15 @@ enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id,
* @sd_cnt: number of sd entries
* @setsd: flag to set or clear sd
*/
-static enum irdma_status_code irdma_hmc_sd_grp(struct irdma_sc_dev *dev,
- struct irdma_hmc_info *hmc_info,
- u32 sd_index, u32 sd_cnt,
- bool setsd)
+static int irdma_hmc_sd_grp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ u32 sd_cnt, bool setsd)
{
struct irdma_hmc_sd_entry *sd_entry;
struct irdma_update_sds_info sdinfo = {};
u64 pa;
u32 i;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
for (i = sd_index; i < sd_index + sd_cnt; i++) {
@@ -196,16 +192,15 @@ static enum irdma_status_code irdma_hmc_sd_grp(struct irdma_sc_dev *dev,
* @dev: pointer to the device structure
* @info: create obj info
*/
-static enum irdma_status_code
-irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev,
- struct irdma_hmc_create_obj_info *info)
+static int irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
{
if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return IRDMA_ERR_INVALID_HMC_OBJ_INDEX;
+ return -EINVAL;
if ((info->start_idx + info->count) >
info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return IRDMA_ERR_INVALID_HMC_OBJ_COUNT;
+ return -EINVAL;
if (!info->add_sd_cnt)
return 0;
@@ -222,9 +217,8 @@ irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev,
* This will allocate memory for PDs and backing pages and populate
* the sd and pd entries.
*/
-enum irdma_status_code
-irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
- struct irdma_hmc_create_obj_info *info)
+int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
{
struct irdma_hmc_sd_entry *sd_entry;
u32 sd_idx, sd_lmt;
@@ -232,10 +226,10 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
u32 pd_idx1 = 0, pd_lmt1 = 0;
u32 i, j;
bool pd_error = false;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return IRDMA_ERR_INVALID_HMC_OBJ_INDEX;
+ return -EINVAL;
if ((info->start_idx + info->count) >
info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
@@ -243,7 +237,7 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
"HMC: error type %u, start = %u, req cnt %u, cnt = %u\n",
info->rsrc_type, info->start_idx, info->count,
info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return IRDMA_ERR_INVALID_HMC_OBJ_COUNT;
+ return -EINVAL;
}
irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type,
@@ -251,7 +245,7 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
&sd_lmt);
if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
sd_lmt > info->hmc_info->sd_table.sd_cnt) {
- return IRDMA_ERR_INVALID_SD_INDEX;
+ return -EINVAL;
}
irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type,
@@ -312,7 +306,7 @@ exit_sd_error:
irdma_prep_remove_pd_page(info->hmc_info, (j - 1));
break;
default:
- ret_code = IRDMA_ERR_INVALID_SD_TYPE;
+ ret_code = -EINVAL;
break;
}
j--;
@@ -327,12 +321,12 @@ exit_sd_error:
* @info: dele obj info
* @reset: true if called before reset
*/
-static enum irdma_status_code
-irdma_finish_del_sd_reg(struct irdma_sc_dev *dev,
- struct irdma_hmc_del_obj_info *info, bool reset)
+static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info,
+ bool reset)
{
struct irdma_hmc_sd_entry *sd_entry;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u32 i, sd_idx;
struct irdma_dma_mem *mem;
@@ -373,22 +367,21 @@ irdma_finish_del_sd_reg(struct irdma_sc_dev *dev,
* caller should deallocate memory allocated previously for
* book-keeping information about PDs and backing storage.
*/
-enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
- struct irdma_hmc_del_obj_info *info,
- bool reset)
+int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info, bool reset)
{
struct irdma_hmc_pd_table *pd_table;
u32 sd_idx, sd_lmt;
u32 pd_idx, pd_lmt, rel_pd_idx;
u32 i, j;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
ibdev_dbg(to_ibdev(dev),
"HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
info->start_idx, info->rsrc_type,
info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return IRDMA_ERR_INVALID_HMC_OBJ_INDEX;
+ return -EINVAL;
}
if ((info->start_idx + info->count) >
@@ -397,7 +390,7 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
"HMC: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n",
info->start_idx, info->count, info->rsrc_type,
info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return IRDMA_ERR_INVALID_HMC_OBJ_COUNT;
+ return -EINVAL;
}
irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type,
@@ -433,7 +426,7 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
sd_lmt > info->hmc_info->sd_table.sd_cnt) {
ibdev_dbg(to_ibdev(dev), "HMC: invalid sd_idx\n");
- return IRDMA_ERR_INVALID_SD_INDEX;
+ return -EINVAL;
}
for (i = sd_idx; i < sd_lmt; i++) {
@@ -477,11 +470,9 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
* @type: what type of segment descriptor we're manipulating
* @direct_mode_sz: size to alloc in direct mode
*/
-enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw,
- struct irdma_hmc_info *hmc_info,
- u32 sd_index,
- enum irdma_sd_entry_type type,
- u64 direct_mode_sz)
+int irdma_add_sd_table_entry(struct irdma_hw *hw,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ enum irdma_sd_entry_type type, u64 direct_mode_sz)
{
struct irdma_hmc_sd_entry *sd_entry;
struct irdma_dma_mem dma_mem;
@@ -499,7 +490,7 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw,
dma_mem.va = dma_alloc_coherent(hw->device, dma_mem.size,
&dma_mem.pa, GFP_KERNEL);
if (!dma_mem.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
if (type == IRDMA_SD_TYPE_PAGED) {
struct irdma_virt_mem *vmem =
&sd_entry->u.pd_table.pd_entry_virt_mem;
@@ -510,7 +501,7 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw,
dma_free_coherent(hw->device, dma_mem.size,
dma_mem.va, dma_mem.pa);
dma_mem.va = NULL;
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
sd_entry->u.pd_table.pd_entry = vmem->va;
@@ -549,10 +540,9 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw,
* aligned on 4K boundary and zeroed memory.
* 2. It should be 4K in size.
*/
-enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
- struct irdma_hmc_info *hmc_info,
- u32 pd_index,
- struct irdma_dma_mem *rsrc_pg)
+int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 pd_index,
+ struct irdma_dma_mem *rsrc_pg)
{
struct irdma_hmc_pd_table *pd_table;
struct irdma_hmc_pd_entry *pd_entry;
@@ -563,7 +553,7 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
u64 page_desc;
if (pd_index / IRDMA_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
- return IRDMA_ERR_INVALID_PAGE_DESC_INDEX;
+ return -EINVAL;
sd_idx = (pd_index / IRDMA_HMC_PD_CNT_IN_SD);
if (hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
@@ -584,7 +574,7 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
page->size, &page->pa,
GFP_KERNEL);
if (!page->va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
pd_entry->rsrc_pg = false;
}
@@ -621,9 +611,8 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
* 1. Caller can deallocate the memory used by backing storage after this
* function returns.
*/
-enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev,
- struct irdma_hmc_info *hmc_info,
- u32 idx)
+int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 idx)
{
struct irdma_hmc_pd_entry *pd_entry;
struct irdma_hmc_pd_table *pd_table;
@@ -635,11 +624,11 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev,
sd_idx = idx / IRDMA_HMC_PD_CNT_IN_SD;
rel_pd_idx = idx % IRDMA_HMC_PD_CNT_IN_SD;
if (sd_idx >= hmc_info->sd_table.sd_cnt)
- return IRDMA_ERR_INVALID_PAGE_DESC_INDEX;
+ return -EINVAL;
sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
if (sd_entry->entry_type != IRDMA_SD_TYPE_PAGED)
- return IRDMA_ERR_INVALID_SD_TYPE;
+ return -EINVAL;
pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
pd_entry = &pd_table->pd_entry[rel_pd_idx];
@@ -656,7 +645,7 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev,
if (!pd_entry->rsrc_pg) {
mem = &pd_entry->bp.addr;
if (!mem || !mem->va)
- return IRDMA_ERR_PARAM;
+ return -EINVAL;
dma_free_coherent(dev->hw->device, mem->size, mem->va,
mem->pa);
@@ -673,14 +662,13 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev,
* @hmc_info: pointer to the HMC configuration information structure
* @idx: the page index
*/
-enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info,
- u32 idx)
+int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx)
{
struct irdma_hmc_sd_entry *sd_entry;
sd_entry = &hmc_info->sd_table.sd_entry[idx];
if (--sd_entry->u.bp.use_cnt)
- return IRDMA_ERR_NOT_READY;
+ return -EBUSY;
hmc_info->sd_table.use_cnt--;
sd_entry->valid = false;
@@ -693,15 +681,14 @@ enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info,
* @hmc_info: pointer to the HMC configuration information structure
* @idx: segment descriptor index to find the relevant page descriptor
*/
-enum irdma_status_code
-irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx)
+int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx)
{
struct irdma_hmc_sd_entry *sd_entry;
sd_entry = &hmc_info->sd_table.sd_entry[idx];
if (sd_entry->u.pd_table.use_cnt)
- return IRDMA_ERR_NOT_READY;
+ return -EBUSY;
sd_entry->valid = false;
hmc_info->sd_table.use_cnt--;
diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h
index e2139c788b1b..f5c5dacc7021 100644
--- a/drivers/infiniband/hw/irdma/hmc.h
+++ b/drivers/infiniband/hw/irdma/hmc.h
@@ -141,40 +141,29 @@ struct irdma_hmc_del_obj_info {
bool privileged;
};
-enum irdma_status_code irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf,
- struct irdma_dma_mem *src_mem,
- u64 src_offset, u64 size);
-enum irdma_status_code
-irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
- struct irdma_hmc_create_obj_info *info);
-enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
- struct irdma_hmc_del_obj_info *info,
- bool reset);
-enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id,
- u64 pa, u32 sd_idx,
- enum irdma_sd_entry_type type,
- bool setsd);
-enum irdma_status_code
-irdma_update_sds_noccq(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *info);
+int irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf,
+ struct irdma_dma_mem *src_mem, u64 src_offset, u64 size);
+int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info);
+int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info, bool reset);
+int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx,
+ enum irdma_sd_entry_type type,
+ bool setsd);
+int irdma_update_sds_noccq(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
struct irdma_vfdev *irdma_vfdev_from_fpm(struct irdma_sc_dev *dev,
u8 hmc_fn_id);
struct irdma_hmc_info *irdma_vf_hmcinfo_from_fpm(struct irdma_sc_dev *dev,
u8 hmc_fn_id);
-enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw,
- struct irdma_hmc_info *hmc_info,
- u32 sd_index,
- enum irdma_sd_entry_type type,
- u64 direct_mode_sz);
-enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
- struct irdma_hmc_info *hmc_info,
- u32 pd_index,
- struct irdma_dma_mem *rsrc_pg);
-enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev,
- struct irdma_hmc_info *hmc_info,
- u32 idx);
-enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info,
- u32 idx);
-enum irdma_status_code
-irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_add_sd_table_entry(struct irdma_hw *hw,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ enum irdma_sd_entry_type type, u64 direct_mode_sz);
+int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 pd_index,
+ struct irdma_dma_mem *rsrc_pg);
+int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx);
#endif /* IRDMA_HMC_H */
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index b4c657f5f2f9..ab246447520b 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -61,7 +61,7 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
struct irdma_cq *cq = iwcq->back_cq;
if (!cq->user_mode)
- cq->armed = false;
+ atomic_set(&cq->armed, 0);
if (cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
@@ -75,12 +75,12 @@ static void irdma_puda_ce_handler(struct irdma_pci_f *rf,
struct irdma_sc_cq *cq)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
- enum irdma_status_code status;
u32 compl_error;
+ int status;
do {
status = irdma_puda_poll_cmpl(dev, cq, &compl_error);
- if (status == IRDMA_ERR_Q_EMPTY)
+ if (status == -ENOENT)
break;
if (status) {
ibdev_dbg(to_ibdev(dev), "ERR: puda status = %d\n", status);
@@ -138,59 +138,68 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
switch (info->ae_id) {
- case IRDMA_AE_AMP_UNALLOCATED_STAG:
case IRDMA_AE_AMP_BOUNDS_VIOLATION:
case IRDMA_AE_AMP_INVALID_STAG:
- qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
- fallthrough;
+ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+ case IRDMA_AE_AMP_UNALLOCATED_STAG:
case IRDMA_AE_AMP_BAD_PD:
- case IRDMA_AE_UDA_XMIT_BAD_PD:
+ case IRDMA_AE_AMP_BAD_QP:
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ case IRDMA_AE_AMP_TO_WRAP:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
qp->flush_code = FLUSH_PROT_ERR;
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
break;
- case IRDMA_AE_AMP_BAD_QP:
+ case IRDMA_AE_UDA_XMIT_BAD_PD:
case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
qp->flush_code = FLUSH_LOC_QP_OP_ERR;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp->flush_code = FLUSH_LOC_LEN_ERR;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
break;
- case IRDMA_AE_AMP_BAD_STAG_KEY:
- case IRDMA_AE_AMP_BAD_STAG_INDEX:
- case IRDMA_AE_AMP_TO_WRAP:
- case IRDMA_AE_AMP_RIGHTS_VIOLATION:
case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case IRDMA_AE_PRIV_OPERATION_DENIED:
- case IRDMA_AE_IB_INVALID_REQUEST:
case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
qp->flush_code = FLUSH_REM_ACCESS_ERR;
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
break;
case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
- case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
- case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
- case IRDMA_AE_UDA_L4LEN_INVALID:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
- qp->flush_code = FLUSH_LOC_LEN_ERR;
+ case IRDMA_AE_IB_REMOTE_OP_ERROR:
+ qp->flush_code = FLUSH_REM_OP_ERR;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
break;
case IRDMA_AE_LCE_QP_CATASTROPHIC:
qp->flush_code = FLUSH_FATAL_ERR;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
break;
- case IRDMA_AE_DDP_UBE_INVALID_MO:
case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
- case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
qp->flush_code = FLUSH_GENERAL_ERR;
break;
case IRDMA_AE_LLP_TOO_MANY_RETRIES:
qp->flush_code = FLUSH_RETRY_EXC_ERR;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
break;
case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
qp->flush_code = FLUSH_MW_BIND_ERR;
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
break;
- case IRDMA_AE_IB_REMOTE_OP_ERROR:
- qp->flush_code = FLUSH_REM_OP_ERR;
+ case IRDMA_AE_IB_INVALID_REQUEST:
+ qp->flush_code = FLUSH_REM_INV_REQ_ERR;
+ qp->event_type = IRDMA_QP_EVENT_REQ_ERR;
break;
default:
- qp->flush_code = FLUSH_FATAL_ERR;
+ qp->flush_code = FLUSH_GENERAL_ERR;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
break;
}
}
@@ -257,10 +266,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
iwqp->last_aeq = info->ae_id;
spin_unlock_irqrestore(&iwqp->lock, flags);
ctx_info = &iwqp->ctx_info;
- if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1))
- ctx_info->roce_info->err_rq_idx_valid = true;
- else
- ctx_info->iwarp_info->err_rq_idx_valid = true;
} else {
if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR)
continue;
@@ -370,16 +375,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
- if (rdma_protocol_roce(&iwdev->ibdev, 1))
- ctx_info->roce_info->err_rq_idx_valid = false;
- else
- ctx_info->iwarp_info->err_rq_idx_valid = false;
- fallthrough;
default:
- ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n",
- info->ae_id, info->qp, info->qp_cq_id);
+ ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
- if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) {
+ ctx_info->roce_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
ctx_info->roce_info->err_rq_idx = info->wqe_idx;
irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
ctx_info);
@@ -388,7 +389,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
irdma_cm_disconn(iwqp);
break;
}
- if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) {
+ ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
ctx_info->tcp_info_valid = false;
ctx_info->iwarp_info_valid = true;
@@ -456,7 +458,7 @@ static void irdma_ceq_dpc(struct tasklet_struct *t)
* Allocate iwdev msix table and copy the msix info to the table
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf)
+static int irdma_save_msix_info(struct irdma_pci_f *rf)
{
struct irdma_qvlist_info *iw_qvlist;
struct irdma_qv_info *iw_qvinfo;
@@ -466,13 +468,13 @@ static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf)
size_t size;
if (!rf->msix_count)
- return IRDMA_ERR_NO_INTR;
+ return -EINVAL;
size = sizeof(struct irdma_msix_vector) * rf->msix_count;
size += struct_size(iw_qvlist, qv_info, rf->msix_count);
rf->iw_msixtbl = kzalloc(size, GFP_KERNEL);
if (!rf->iw_msixtbl)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
rf->iw_qvlist = (struct irdma_qvlist_info *)
(&rf->iw_msixtbl[rf->msix_count]);
@@ -550,7 +552,7 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf,
struct irdma_sc_dev *dev = &rf->sc_dev;
dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx);
- irq_set_affinity_hint(msix_vec->irq, NULL);
+ irq_update_affinity_hint(msix_vec->irq, NULL);
free_irq(msix_vec->irq, dev_id);
}
@@ -564,9 +566,9 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf,
*/
static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp)
{
- enum irdma_status_code status = 0;
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_cqp *cqp = &rf->cqp;
+ int status = 0;
if (rf->cqp_cmpl_wq)
destroy_workqueue(rf->cqp_cmpl_wq);
@@ -606,9 +608,9 @@ static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf)
*/
static void irdma_destroy_aeq(struct irdma_pci_f *rf)
{
- enum irdma_status_code status = IRDMA_ERR_NOT_READY;
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_aeq *aeq = &rf->aeq;
+ int status = -EBUSY;
if (!rf->msix_shared) {
rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false);
@@ -642,8 +644,8 @@ exit:
*/
static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq)
{
- enum irdma_status_code status;
struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
if (rf->reset)
goto exit;
@@ -733,7 +735,7 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_ccq *ccq = &rf->ccq;
- enum irdma_status_code status = 0;
+ int status = 0;
if (!rf->reset)
status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
@@ -796,9 +798,8 @@ static void irdma_del_hmc_objects(struct irdma_sc_dev *dev,
* @dev: hardware control device structure
* @info: information for the hmc object to create
*/
-static enum irdma_status_code
-irdma_create_hmc_obj_type(struct irdma_sc_dev *dev,
- struct irdma_hmc_create_obj_info *info)
+static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
{
return irdma_sc_create_hmc_obj(dev, info);
}
@@ -812,13 +813,12 @@ irdma_create_hmc_obj_type(struct irdma_sc_dev *dev,
* Create the device hmc objects and allocate hmc pages
* Return 0 if successful, otherwise clean up and return error
*/
-static enum irdma_status_code
-irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers vers)
+static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged,
+ enum irdma_vers vers)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_hmc_create_obj_info info = {};
- enum irdma_status_code status = 0;
- int i;
+ int i, status = 0;
info.hmc_info = dev->hmc_info;
info.privileged = privileged;
@@ -868,9 +868,9 @@ irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers v
* update the memptr to point to the new aligned memory
* Return 0 if successful, otherwise return no memory error
*/
-static enum irdma_status_code
-irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr,
- u32 size, u32 mask)
+static int irdma_obj_aligned_mem(struct irdma_pci_f *rf,
+ struct irdma_dma_mem *memptr, u32 size,
+ u32 mask)
{
unsigned long va, newva;
unsigned long extra;
@@ -884,7 +884,7 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr,
memptr->pa = rf->obj_next.pa + extra;
memptr->size = size;
if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size))
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
rf->obj_next.va = (u8 *)memptr->va + size;
rf->obj_next.pa = memptr->pa + size;
@@ -899,25 +899,24 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr,
* Return 0, if the cqp and all the resources associated with it
* are successfully created, otherwise return error
*/
-static enum irdma_status_code irdma_create_cqp(struct irdma_pci_f *rf)
+static int irdma_create_cqp(struct irdma_pci_f *rf)
{
- enum irdma_status_code status;
u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048;
struct irdma_dma_mem mem;
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_cqp_init_info cqp_init_info = {};
struct irdma_cqp *cqp = &rf->cqp;
u16 maj_err, min_err;
- int i;
+ int i, status;
cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
if (!cqp->cqp_requests)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
if (!cqp->scratch_array) {
kfree(cqp->cqp_requests);
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
dev->cqp = &cqp->sc_cqp;
@@ -929,7 +928,7 @@ static enum irdma_status_code irdma_create_cqp(struct irdma_pci_f *rf)
if (!cqp->sq.va) {
kfree(cqp->scratch_array);
kfree(cqp->cqp_requests);
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx),
@@ -999,12 +998,12 @@ exit:
* Return 0, if the ccq and the resources associated with it
* are successfully created, otherwise return error
*/
-static enum irdma_status_code irdma_create_ccq(struct irdma_pci_f *rf)
+static int irdma_create_ccq(struct irdma_pci_f *rf)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
- enum irdma_status_code status;
struct irdma_ccq_init_info info = {};
struct irdma_ccq *ccq = &rf->ccq;
+ int status;
dev->ccq = &ccq->sc_cq;
dev->ccq->dev = dev;
@@ -1015,7 +1014,7 @@ static enum irdma_status_code irdma_create_ccq(struct irdma_pci_f *rf)
ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size,
&ccq->mem_cq.pa, GFP_KERNEL);
if (!ccq->mem_cq.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
status = irdma_obj_aligned_mem(rf, &ccq->shadow_area,
ccq->shadow_area.size,
@@ -1054,9 +1053,9 @@ exit:
* Allocate a mac ip entry and add it to the hw table Return 0
* if successful, otherwise return error
*/
-static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev)
+static int irdma_alloc_set_mac(struct irdma_device *iwdev)
{
- enum irdma_status_code status;
+ int status;
status = irdma_alloc_local_mac_entry(iwdev->rf,
&iwdev->mac_ip_table_idx);
@@ -1082,9 +1081,8 @@ static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev)
* Allocate interrupt resources and enable irq handling
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code
-irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
- u32 ceq_id, struct irdma_msix_vector *msix_vec)
+static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
+ u32 ceq_id, struct irdma_msix_vector *msix_vec)
{
int status;
@@ -1100,10 +1098,10 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
}
cpumask_clear(&msix_vec->mask);
cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
- irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask);
+ irq_update_affinity_hint(msix_vec->irq, &msix_vec->mask);
if (status) {
ibdev_dbg(&rf->iwdev->ibdev, "ERR: ceq irq config fail\n");
- return IRDMA_ERR_CFG;
+ return status;
}
msix_vec->ceq_id = ceq_id;
@@ -1119,7 +1117,7 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
* Allocate interrupt resources and enable irq handling
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
+static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
{
struct irdma_msix_vector *msix_vec = rf->iw_msixtbl;
u32 ret = 0;
@@ -1131,7 +1129,7 @@ static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
}
if (ret) {
ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n");
- return IRDMA_ERR_CFG;
+ return -EINVAL;
}
rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true);
@@ -1149,12 +1147,10 @@ static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
* Return 0, if the ceq and the resources associated with it
* are successfully created, otherwise return error
*/
-static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf,
- struct irdma_ceq *iwceq,
- u32 ceq_id,
- struct irdma_sc_vsi *vsi)
+static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
+ u32 ceq_id, struct irdma_sc_vsi *vsi)
{
- enum irdma_status_code status;
+ int status;
struct irdma_ceq_init_info info = {};
struct irdma_sc_dev *dev = &rf->sc_dev;
u64 scratch;
@@ -1169,7 +1165,7 @@ static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf,
iwceq->mem.va = dma_alloc_coherent(dev->hw->device, iwceq->mem.size,
&iwceq->mem.pa, GFP_KERNEL);
if (!iwceq->mem.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
info.ceq_id = ceq_id;
info.ceqe_base = iwceq->mem.va;
@@ -1205,18 +1201,18 @@ static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf,
* Create the ceq 0 and configure it's msix interrupt vector
* Return 0, if successfully set up, otherwise return error
*/
-static enum irdma_status_code irdma_setup_ceq_0(struct irdma_pci_f *rf)
+static int irdma_setup_ceq_0(struct irdma_pci_f *rf)
{
struct irdma_ceq *iwceq;
struct irdma_msix_vector *msix_vec;
u32 i;
- enum irdma_status_code status = 0;
+ int status = 0;
u32 num_ceqs;
num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs);
rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL);
if (!rf->ceqlist) {
- status = IRDMA_ERR_NO_MEMORY;
+ status = -ENOMEM;
goto exit;
}
@@ -1262,14 +1258,13 @@ exit:
* Create the ceq's and configure their msix interrupt vectors
* Return 0, if ceqs are successfully set up, otherwise return error
*/
-static enum irdma_status_code irdma_setup_ceqs(struct irdma_pci_f *rf,
- struct irdma_sc_vsi *vsi)
+static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi)
{
u32 i;
u32 ceq_id;
struct irdma_ceq *iwceq;
struct irdma_msix_vector *msix_vec;
- enum irdma_status_code status;
+ int status;
u32 num_ceqs;
num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs);
@@ -1303,22 +1298,21 @@ del_ceqs:
return status;
}
-static enum irdma_status_code irdma_create_virt_aeq(struct irdma_pci_f *rf,
- u32 size)
+static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size)
{
- enum irdma_status_code status = IRDMA_ERR_NO_MEMORY;
struct irdma_aeq *aeq = &rf->aeq;
dma_addr_t *pg_arr;
u32 pg_cnt;
+ int status;
if (rf->rdma_ver < IRDMA_GEN_2)
- return IRDMA_NOT_SUPPORTED;
+ return -EOPNOTSUPP;
aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size;
aeq->mem.va = vzalloc(aeq->mem.size);
if (!aeq->mem.va)
- return status;
+ return -ENOMEM;
pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE);
status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true);
@@ -1345,15 +1339,15 @@ static enum irdma_status_code irdma_create_virt_aeq(struct irdma_pci_f *rf,
* Return 0, if the aeq and the resources associated with it
* are successfully created, otherwise return error
*/
-static enum irdma_status_code irdma_create_aeq(struct irdma_pci_f *rf)
+static int irdma_create_aeq(struct irdma_pci_f *rf)
{
- enum irdma_status_code status;
struct irdma_aeq_init_info info = {};
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_aeq *aeq = &rf->aeq;
struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info;
u32 aeq_size;
u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1;
+ int status;
aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt +
hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
@@ -1412,10 +1406,10 @@ err:
* Create the aeq and configure its msix interrupt vector
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code irdma_setup_aeq(struct irdma_pci_f *rf)
+static int irdma_setup_aeq(struct irdma_pci_f *rf)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
- enum irdma_status_code status;
+ int status;
status = irdma_create_aeq(rf);
if (status)
@@ -1439,10 +1433,10 @@ static enum irdma_status_code irdma_setup_aeq(struct irdma_pci_f *rf)
*
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code irdma_initialize_ilq(struct irdma_device *iwdev)
+static int irdma_initialize_ilq(struct irdma_device *iwdev)
{
struct irdma_puda_rsrc_info info = {};
- enum irdma_status_code status;
+ int status;
info.type = IRDMA_PUDA_RSRC_TYPE_ILQ;
info.cq_id = 1;
@@ -1469,10 +1463,10 @@ static enum irdma_status_code irdma_initialize_ilq(struct irdma_device *iwdev)
*
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code irdma_initialize_ieq(struct irdma_device *iwdev)
+static int irdma_initialize_ieq(struct irdma_device *iwdev)
{
struct irdma_puda_rsrc_info info = {};
- enum irdma_status_code status;
+ int status;
info.type = IRDMA_PUDA_RSRC_TYPE_IEQ;
info.cq_id = 2;
@@ -1515,15 +1509,12 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi)
* the hmc objects and create the objects
* Return 0 if successful, otherwise return error
*/
-static enum irdma_status_code irdma_hmc_setup(struct irdma_pci_f *rf)
+static int irdma_hmc_setup(struct irdma_pci_f *rf)
{
- enum irdma_status_code status;
+ int status;
u32 qpcnt;
- if (rf->rdma_ver == IRDMA_GEN_1)
- qpcnt = rsrc_limits_table[rf->limits_sel].qplimit * 2;
- else
- qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
+ qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
rf->sd_type = IRDMA_SD_TYPE_DIRECT;
status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt);
@@ -1551,7 +1542,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
rf->obj_mem.pa);
rf->obj_mem.va = NULL;
if (rf->rdma_ver != IRDMA_GEN_1) {
- kfree(rf->allocated_ws_nodes);
+ bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
}
kfree(rf->ceqlist);
@@ -1570,9 +1561,9 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
* Return 0 if successful, otherwise clean up the resources
* and return error
*/
-static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf)
+static int irdma_initialize_dev(struct irdma_pci_f *rf)
{
- enum irdma_status_code status;
+ int status;
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_device_init_info info = {};
struct irdma_dma_mem mem;
@@ -1584,7 +1575,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf)
rf->hmc_info_mem = kzalloc(size, GFP_KERNEL);
if (!rf->hmc_info_mem)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem;
dev->hmc_info = &rf->hw.hmc;
@@ -1608,7 +1599,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf)
info.fpm_commit_buf = mem.va;
info.bar0 = rf->hw.hw_addr;
- info.hmc_fn_id = PCI_FUNC(rf->pcidev->devfn);
+ info.hmc_fn_id = rf->pf_id;
info.hw = &rf->hw;
status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info);
if (status)
@@ -1667,9 +1658,9 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev)
destroy_workqueue(iwdev->cleanup_wq);
}
-static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf)
+static int irdma_setup_init_state(struct irdma_pci_f *rf)
{
- enum irdma_status_code status;
+ int status;
status = irdma_save_msix_info(rf);
if (status)
@@ -1680,7 +1671,7 @@ static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf)
rf->obj_mem.va = dma_alloc_coherent(rf->hw.device, rf->obj_mem.size,
&rf->obj_mem.pa, GFP_KERNEL);
if (!rf->obj_mem.va) {
- status = IRDMA_ERR_NO_MEMORY;
+ status = -ENOMEM;
goto clean_msixtbl;
}
@@ -1709,14 +1700,14 @@ clean_msixtbl:
*/
static void irdma_get_used_rsrc(struct irdma_device *iwdev)
{
- iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds,
- iwdev->rf->max_pd, 0);
- iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps,
- iwdev->rf->max_qp, 0);
- iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs,
- iwdev->rf->max_cq, 0);
- iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs,
- iwdev->rf->max_mr, 0);
+ iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds,
+ iwdev->rf->max_pd);
+ iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps,
+ iwdev->rf->max_qp);
+ iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs,
+ iwdev->rf->max_cq);
+ iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr);
}
void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
@@ -1763,14 +1754,14 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
* Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma
* device resource objects.
*/
-enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev,
- struct irdma_l2params *l2params)
+int irdma_rt_init_hw(struct irdma_device *iwdev,
+ struct irdma_l2params *l2params)
{
struct irdma_pci_f *rf = iwdev->rf;
struct irdma_sc_dev *dev = &rf->sc_dev;
- enum irdma_status_code status;
struct irdma_vsi_init_info vsi_info = {};
struct irdma_vsi_stats_info stats_info = {};
+ int status;
vsi_info.dev = dev;
vsi_info.back_vsi = iwdev;
@@ -1788,7 +1779,7 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev,
stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
if (!stats_info.pestat) {
irdma_cleanup_cm_core(&iwdev->cm_core);
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
stats_info.fcn_id = dev->hmc_fn_id;
status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info);
@@ -1835,10 +1826,6 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev,
rf->rsrc_created = true;
}
- iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
-
if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
irdma_alloc_set_mac(iwdev);
irdma_add_ip(iwdev);
@@ -1850,7 +1837,7 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev,
iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq",
WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
if (!iwdev->cleanup_wq)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
irdma_get_used_rsrc(iwdev);
init_waitqueue_head(&iwdev->suspend_wq);
@@ -1870,10 +1857,10 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev,
*
* Create admin queues, HMC obejcts and RF resource objects
*/
-enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf)
+int irdma_ctrl_init_hw(struct irdma_pci_f *rf)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
- enum irdma_status_code status;
+ int status;
do {
status = irdma_setup_init_state(rf);
if (status)
@@ -1915,7 +1902,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf)
rf->cqp_cmpl_wq = alloc_ordered_workqueue("cqp_cmpl_wq",
WQ_HIGHPRI | WQ_UNBOUND);
if (!rf->cqp_cmpl_wq) {
- status = IRDMA_ERR_NO_MEMORY;
+ status = -ENOMEM;
break;
}
INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker);
@@ -1984,9 +1971,8 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
u32 ret;
if (rf->rdma_ver != IRDMA_GEN_1) {
- rf->allocated_ws_nodes =
- kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES),
- sizeof(unsigned long), GFP_KERNEL);
+ rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES,
+ GFP_KERNEL);
if (!rf->allocated_ws_nodes)
return -ENOMEM;
@@ -2035,7 +2021,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
return 0;
mem_rsrc_kzalloc_fail:
- kfree(rf->allocated_ws_nodes);
+ bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
return ret;
@@ -2202,11 +2188,11 @@ int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 id
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_info->post_sq = 1;
@@ -2238,11 +2224,11 @@ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx)
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status = 0;
+ int status = 0;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY;
@@ -2264,18 +2250,17 @@ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx)
* @accel_local_port: port for apbvt
* @add_port: add ordelete port
*/
-static enum irdma_status_code
-irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, u16 accel_local_port,
- bool add_port)
+static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev,
+ u16 accel_local_port, bool add_port)
{
struct irdma_apbvt_info *info;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.manage_apbvt_entry.info;
@@ -2429,22 +2414,21 @@ static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request)
* @cmnode: cmnode associated with connection
* @wait: wait for completion
*/
-enum irdma_status_code
-irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
- enum irdma_quad_entry_type etype,
- enum irdma_quad_hash_manage_type mtype, void *cmnode,
- bool wait)
+int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
+ enum irdma_quad_entry_type etype,
+ enum irdma_quad_hash_manage_type mtype, void *cmnode,
+ bool wait)
{
struct irdma_qhash_table_info *info;
- enum irdma_status_code status;
struct irdma_cqp *iwcqp = &iwdev->rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_cm_node *cm_node = cmnode;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.manage_qhash_table_entry.info;
@@ -2558,12 +2542,10 @@ static void irdma_hw_flush_wqes_callback(struct irdma_cqp_request *cqp_request)
* @info: info for flush
* @wait: flag wait for completion
*/
-enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf,
- struct irdma_sc_qp *qp,
- struct irdma_qp_flush_info *info,
- bool wait)
+int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, bool wait)
{
- enum irdma_status_code status;
+ int status;
struct irdma_qp_flush_info *hw_info;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
@@ -2571,7 +2553,7 @@ enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf,
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
if (!wait)
@@ -2619,7 +2601,7 @@ enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf,
info->sq = true;
new_req = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!new_req) {
- status = IRDMA_ERR_NO_MEMORY;
+ status = -ENOMEM;
goto put_cqp;
}
cqp_info = &new_req->info;
@@ -2705,24 +2687,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
info.sq = flush_mask & IRDMA_FLUSH_SQ;
info.rq = flush_mask & IRDMA_FLUSH_RQ;
- if (flush_mask & IRDMA_REFLUSH) {
- if (info.sq)
- iwqp->sc_qp.flush_sq = false;
- if (info.rq)
- iwqp->sc_qp.flush_rq = false;
- }
-
/* Generate userflush errors in CQE */
info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.sq_minor_code = FLUSH_GENERAL_ERR;
info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.rq_minor_code = FLUSH_GENERAL_ERR;
info.userflushcode = true;
- if (flush_code) {
- if (info.sq && iwqp->sc_qp.sq_flush_code)
- info.sq_minor_code = flush_code;
- if (info.rq && iwqp->sc_qp.rq_flush_code)
- info.rq_minor_code = flush_code;
+
+ if (flush_mask & IRDMA_REFLUSH) {
+ if (info.sq)
+ iwqp->sc_qp.flush_sq = false;
+ if (info.rq)
+ iwqp->sc_qp.flush_rq = false;
+ } else {
+ if (flush_code) {
+ if (info.sq && iwqp->sc_qp.sq_flush_code)
+ info.sq_minor_code = flush_code;
+ if (info.rq && iwqp->sc_qp.rq_flush_code)
+ info.rq_minor_code = flush_code;
+ }
+ if (!iwqp->user_mode)
+ queue_delayed_work(iwqp->iwdev->cleanup_wq,
+ &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
/* Issue flush */
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
index 64148ad8a604..50299f58b6b3 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.c
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -3,7 +3,6 @@
#include "osdep.h"
#include "type.h"
#include "i40iw_hw.h"
-#include "status.h"
#include "protos.h"
static u32 i40iw_regs[IRDMA_MAX_REGS] = {
@@ -202,6 +201,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev)
dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c
index d219f64b2c3d..4053ead32416 100644
--- a/drivers/infiniband/hw/irdma/i40iw_if.c
+++ b/drivers/infiniband/hw/irdma/i40iw_if.c
@@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info
rf->rdma_ver = IRDMA_GEN_1;
rf->gen_ops.request_reset = i40iw_request_reset;
rf->pcidev = cdev_info->pcidev;
+ rf->pf_id = cdev_info->fid;
rf->hw.hw_addr = cdev_info->hw_addr;
rf->cdev = cdev_info;
rf->msix_count = cdev_info->msix_count;
@@ -138,7 +139,7 @@ static int i40iw_open(struct i40e_info *cdev_info, struct i40e_client *client)
if (last_qset == IRDMA_NO_QSET)
last_qset = qset;
else if ((qset != last_qset) && (qset != IRDMA_NO_QSET))
- iwdev->dcb = true;
+ iwdev->dcb_vlan_mode = true;
}
if (irdma_rt_init_hw(iwdev, &l2params)) {
@@ -198,7 +199,7 @@ static void i40iw_remove(struct auxiliary_device *aux_dev)
aux_dev);
struct i40e_info *cdev_info = i40e_adev->ldev;
- return i40e_client_device_unregister(cdev_info);
+ i40e_client_device_unregister(cdev_info);
}
static const struct auxiliary_device_id i40iw_auxiliary_id_table[] = {
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
index cf53b17510cd..5986fd906308 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.c
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
dev->irq_ops = &icrdma_irq_ops;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
index 46c12334c735..4789e85d717b 100644
--- a/drivers/infiniband/hw/irdma/irdma.h
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -127,6 +127,7 @@ struct irdma_hw_attrs {
u64 max_hw_outbound_msg_size;
u64 max_hw_inbound_msg_size;
u64 max_mr_size;
+ u64 page_size_cap;
u32 min_hw_qp_id;
u32 min_hw_aeq_size;
u32 max_hw_aeq_size;
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 51a41359e0b4..514453777e07 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -79,6 +79,10 @@ static void irdma_fill_qos_info(struct irdma_l2params *l2params,
}
for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
l2params->up2tc[i] = qos_info->up2tc[i];
+ if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) {
+ l2params->dscp_mode = true;
+ memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
+ }
}
static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event)
@@ -108,8 +112,9 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event
l2params.tc_changed = true;
ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
ice_get_qos_params(pf, &qos_info);
- iwdev->dcb = qos_info.num_tc > 1;
irdma_fill_qos_info(&l2params, &qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode = qos_info.num_tc > 1 && !l2params.dscp_mode;
irdma_change_l2params(&iwdev->vsi, &l2params);
} else if (*event->type & BIT(IIDC_EVENT_CRIT_ERR)) {
ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n",
@@ -157,8 +162,8 @@ static void irdma_request_reset(struct irdma_pci_f *rf)
* @vsi: vsi structure
* @tc_node: Traffic class node
*/
-static enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi,
- struct irdma_ws_node *tc_node)
+static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
{
struct irdma_device *iwdev = vsi->back_vsi;
struct ice_pf *pf = iwdev->rf->cdev;
@@ -171,7 +176,7 @@ static enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi,
ret = ice_add_rdma_qset(pf, &qset);
if (ret) {
ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n");
- return IRDMA_ERR_REG_QSET;
+ return ret;
}
tc_node->l2_sched_node_id = qset.teid;
@@ -207,7 +212,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
struct iidc_auxiliary_dev,
adev);
struct ice_pf *pf = iidc_adev->pf;
- struct irdma_device *iwdev = dev_get_drvdata(&aux_dev->dev);
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
irdma_ib_unregister_device(iwdev);
ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false);
@@ -226,16 +231,18 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf
rf->hw.hw_addr = pf->hw.hw_addr;
rf->pcidev = pf->pdev;
rf->msix_count = pf->num_rdma_msix;
+ rf->pf_id = pf->hw.pf_id;
rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector];
rf->default_vsi.vsi_idx = vsi->vsi_num;
- rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
+ rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ?
+ IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
rf->rdma_ver = IRDMA_GEN_2;
rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
rf->gen_ops.request_reset = irdma_request_reset;
rf->limits_sel = 7;
rf->iwdev = iwdev;
-
+ mutex_init(&iwdev->ah_tbl_lock);
iwdev->netdev = vsi->netdev;
iwdev->vsi_num = vsi->vsi_num;
iwdev->init_state = INITIAL_STATE;
@@ -274,18 +281,19 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
irdma_fill_device_info(iwdev, pf, vsi);
rf = iwdev->rf;
- if (irdma_ctrl_init_hw(rf)) {
- err = -EIO;
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
goto err_ctrl_init;
- }
l2params.mtu = iwdev->netdev->mtu;
ice_get_qos_params(pf, &qos_info);
irdma_fill_qos_info(&l2params, &qos_info);
- if (irdma_rt_init_hw(iwdev, &l2params)) {
- err = -EIO;
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
+
+ err = irdma_rt_init_hw(iwdev, &l2params);
+ if (err)
goto err_rt_init;
- }
err = irdma_ib_register_device(iwdev);
if (err)
@@ -294,7 +302,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, true);
ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn));
- dev_set_drvdata(&aux_dev->dev, iwdev);
+ auxiliary_set_drvdata(aux_dev, iwdev);
return 0;
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index cb218cab79ac..65e966ad3453 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -40,7 +40,6 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/uverbs_ioctl.h>
-#include "status.h"
#include "osdep.h"
#include "defs.h"
#include "hmc.h"
@@ -86,7 +85,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_NO_QSET 0xffff
#define IW_CFG_FPM_QP_COUNT 32768
-#define IRDMA_MAX_PAGES_PER_FMR 512
+#define IRDMA_MAX_PAGES_PER_FMR 262144
#define IRDMA_MIN_PAGES_PER_FMR 1
#define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2
#define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3
@@ -242,8 +241,8 @@ struct irdma_qvlist_info {
struct irdma_gen_ops {
void (*request_reset)(struct irdma_pci_f *rf);
- enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi,
- struct irdma_ws_node *tc_node);
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
void (*unregister_qset)(struct irdma_sc_vsi *vsi,
struct irdma_ws_node *tc_node);
};
@@ -257,6 +256,7 @@ struct irdma_pci_f {
u8 *mem_rsrc;
u8 rdma_ver;
u8 rst_to;
+ u8 pf_id;
enum irdma_protocol_used protocol_used;
u32 sd_type;
u32 msix_count;
@@ -332,11 +332,12 @@ struct irdma_device {
struct workqueue_struct *cleanup_wq;
struct irdma_sc_vsi vsi;
struct irdma_cm_core cm_core;
+ DECLARE_HASHTABLE(ah_hash_tbl, 8);
+ struct mutex ah_tbl_lock; /* protect AH hash table access */
u32 roce_cwnd;
u32 roce_ackcreds;
u32 vendor_id;
u32 vendor_part_id;
- u32 device_cap_flags;
u32 push_mode;
u32 rcv_wnd;
u16 mac_ip_table_idx;
@@ -345,7 +346,7 @@ struct irdma_device {
u8 iw_status;
bool roce_mode:1;
bool roce_dcqcn_en:1;
- bool dcb:1;
+ bool dcb_vlan_mode:1;
bool iw_ooo:1;
enum init_completion_state init_state;
@@ -457,10 +458,10 @@ static inline void irdma_free_rsrc(struct irdma_pci_f *rf,
spin_unlock_irqrestore(&rf->rsrc_lock, flags);
}
-enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf);
+int irdma_ctrl_init_hw(struct irdma_pci_f *rf);
void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf);
-enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev,
- struct irdma_l2params *l2params);
+int irdma_rt_init_hw(struct irdma_device *iwdev,
+ struct irdma_l2params *l2params);
void irdma_rt_deinit_hw(struct irdma_device *iwdev);
void irdma_qp_add_ref(struct ib_qp *ibqp);
void irdma_qp_rem_ref(struct ib_qp *ibqp);
@@ -489,9 +490,8 @@ void irdma_cm_disconn(struct irdma_qp *qp);
bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd,
u16 maj_err_code, u16 min_err_code);
-enum irdma_status_code
-irdma_handle_cqp_op(struct irdma_pci_f *rf,
- struct irdma_cqp_request *cqp_request);
+int irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request);
int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata);
@@ -500,21 +500,17 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf);
-enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev,
- struct irdma_qp *iwqp,
- struct irdma_modify_qp_info *info,
- bool wait);
-enum irdma_status_code irdma_qp_suspend_resume(struct irdma_sc_qp *qp,
- bool suspend);
-enum irdma_status_code
-irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
- enum irdma_quad_entry_type etype,
- enum irdma_quad_hash_manage_type mtype, void *cmnode,
- bool wait);
+int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info, bool wait);
+int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend);
+int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
+ enum irdma_quad_entry_type etype,
+ enum irdma_quad_hash_manage_type mtype, void *cmnode,
+ bool wait);
void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf);
void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp);
void irdma_free_qp_rsrc(struct irdma_qp *iwqp);
-enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver);
+int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver);
void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core);
void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term,
u8 term_len);
@@ -523,10 +519,8 @@ int irdma_send_reset(struct irdma_cm_node *cm_node);
struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core,
u16 rem_port, u32 *rem_addr, u16 loc_port,
u32 *loc_addr, u16 vlan_id);
-enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf,
- struct irdma_sc_qp *qp,
- struct irdma_qp_flush_info *info,
- bool wait);
+int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, bool wait);
void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
struct irdma_gen_ae_info *info, bool wait);
void irdma_copy_ip_ntohl(u32 *dst, __be32 *src);
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
index 63d8bb3a6903..fc1ba2a3e6fb 100644
--- a/drivers/infiniband/hw/irdma/osdep.h
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/bitfield.h>
+#include <linux/net/intel/iidc.h>
#include <crypto/hash.h>
#include <rdma/ib_verbs.h>
@@ -42,32 +43,28 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
void irdma_add_dev_ref(struct irdma_sc_dev *dev);
void irdma_put_dev_ref(struct irdma_sc_dev *dev);
-enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr, u32 len, u32 val);
+int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
+ u32 val);
struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
u32 seqnum);
void irdma_free_hash_desc(struct shash_desc *hash_desc);
-enum irdma_status_code irdma_init_hash_desc(struct shash_desc **hash_desc);
-enum irdma_status_code
-irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
- struct irdma_puda_buf *buf);
-enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *info);
-enum irdma_status_code
-irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
- struct irdma_hmc_fcn_info *hmcfcninfo,
- u16 *pmf_idx);
-enum irdma_status_code
-irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-enum irdma_status_code
-irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *mem);
+int irdma_init_hash_desc(struct shash_desc **hash_desc);
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo,
+ u16 *pmf_idx);
+int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
+int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
+int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term,
u8 term_len);
@@ -79,7 +76,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi);
void wr32(struct irdma_hw *hw, u32 reg, u32 val);
u32 rd32(struct irdma_hw *hw, u32 reg);
u64 rd64(struct irdma_hw *hw, u32 reg);
-enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va,
- dma_addr_t *pg_dma, u32 pg_cnt);
+int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma,
+ u32 pg_cnt);
void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt);
#endif /* IRDMA_OSDEP_H */
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
index fed49da770f3..cdc0b8a6ed48 100644
--- a/drivers/infiniband/hw/irdma/pble.c
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -1,15 +1,13 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
-#include "status.h"
#include "hmc.h"
#include "defs.h"
#include "type.h"
#include "protos.h"
#include "pble.h"
-static enum irdma_status_code
-add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
/**
* irdma_destroy_pble_prm - destroy prm during module unload
@@ -35,13 +33,12 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
* @dev: irdma_sc_dev struct
* @pble_rsrc: pble resources
*/
-enum irdma_status_code
-irdma_hmc_init_pble(struct irdma_sc_dev *dev,
- struct irdma_hmc_pble_rsrc *pble_rsrc)
+int irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc)
{
struct irdma_hmc_info *hmc_info;
u32 fpm_idx = 0;
- enum irdma_status_code status = 0;
+ int status = 0;
hmc_info = dev->hmc_info;
pble_rsrc->dev = dev;
@@ -60,7 +57,7 @@ irdma_hmc_init_pble(struct irdma_sc_dev *dev,
INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
if (add_pble_prm(pble_rsrc)) {
irdma_destroy_pble_prm(pble_rsrc);
- status = IRDMA_ERR_NO_MEMORY;
+ status = -ENOMEM;
}
return status;
@@ -84,12 +81,11 @@ static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc,
* @pble_rsrc: pble resource ptr
* @info: page info for sd
*/
-static enum irdma_status_code
-add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_add_page_info *info)
+static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
{
struct irdma_sc_dev *dev = pble_rsrc->dev;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
struct sd_pd_idx *idx = &info->idx;
struct irdma_chunk *chunk = info->chunk;
struct irdma_hmc_info *hmc_info = info->hmc_info;
@@ -137,9 +133,8 @@ static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr)
* @pble_rsrc: pble resource management
* @info: page info for sd
*/
-static enum irdma_status_code
-add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_add_page_info *info)
+static int add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
{
struct irdma_sc_dev *dev = pble_rsrc->dev;
u8 *addr;
@@ -148,13 +143,13 @@ add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc,
struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
struct irdma_hmc_info *hmc_info = info->hmc_info;
struct irdma_chunk *chunk = info->chunk;
- enum irdma_status_code status = 0;
+ int status = 0;
u32 rel_pd_idx = info->idx.rel_pd_idx;
u32 pd_idx = info->idx.pd_idx;
u32 i;
if (irdma_pble_get_paged_mem(chunk, info->pages))
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
status = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx,
IRDMA_SD_TYPE_PAGED,
@@ -207,8 +202,7 @@ static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev,
* add_pble_prm - add a sd entry for pble resoure
* @pble_rsrc: pble resource management
*/
-static enum irdma_status_code
-add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
{
struct irdma_sc_dev *dev = pble_rsrc->dev;
struct irdma_hmc_sd_entry *sd_entry;
@@ -216,22 +210,22 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
struct irdma_chunk *chunk;
struct irdma_add_page_info info;
struct sd_pd_idx *idx = &info.idx;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
enum irdma_sd_entry_type sd_entry_type;
u64 sd_reg_val = 0;
struct irdma_virt_mem chunkmem;
u32 pages;
if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
if (pble_rsrc->next_fpm_addr & 0xfff)
- return IRDMA_ERR_INVALID_PAGE_DESC_INDEX;
+ return -EINVAL;
chunkmem.size = sizeof(*chunk);
chunkmem.va = kzalloc(chunkmem.size, GFP_KERNEL);
if (!chunkmem.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
chunk = chunkmem.va;
chunk->chunkmem = chunkmem;
@@ -337,9 +331,8 @@ static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc,
* @pble_rsrc: pble resource management
* @palloc: level 2 pble allocation
*/
-static enum irdma_status_code
-get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_pble_alloc *palloc)
+static int get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
{
u32 lf4k, lflast, total, i;
u32 pblcnt = PBLE_PER_PAGE;
@@ -347,7 +340,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
struct irdma_pble_level2 *lvl2 = &palloc->level2;
struct irdma_pble_info *root = &lvl2->root;
struct irdma_pble_info *leaf;
- enum irdma_status_code ret_code;
+ int ret_code;
u64 fpm_addr;
/* number of full 512 (4K) leafs) */
@@ -359,7 +352,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
lvl2->leafmem.size = (sizeof(*leaf) * total);
lvl2->leafmem.va = kzalloc(lvl2->leafmem.size, GFP_KERNEL);
if (!lvl2->leafmem.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
lvl2->leaf = lvl2->leafmem.va;
leaf = lvl2->leaf;
@@ -368,7 +361,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
if (ret_code) {
kfree(lvl2->leafmem.va);
lvl2->leaf = NULL;
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
root->idx = fpm_to_idx(pble_rsrc, fpm_addr);
@@ -397,7 +390,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
error:
free_lvl2(pble_rsrc, palloc);
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
/**
@@ -405,11 +398,10 @@ error:
* @pble_rsrc: pble resource management
* @palloc: level 1 pble allocation
*/
-static enum irdma_status_code
-get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_pble_alloc *palloc)
+static int get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
{
- enum irdma_status_code ret_code;
+ int ret_code;
u64 fpm_addr;
struct irdma_pble_info *lvl1 = &palloc->level1;
@@ -417,7 +409,7 @@ get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
palloc->total_cnt << 3, &lvl1->addr,
&fpm_addr);
if (ret_code)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
palloc->level = PBLE_LEVEL_1;
lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr);
@@ -433,11 +425,10 @@ get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
* @palloc: contains all inforamtion regarding pble (idx + pble addr)
* @level1_only: flag for a level 1 PBLE
*/
-static enum irdma_status_code
-get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_pble_alloc *palloc, bool level1_only)
+static int get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, bool level1_only)
{
- enum irdma_status_code status = 0;
+ int status = 0;
status = get_lvl1_pble(pble_rsrc, palloc);
if (!status || level1_only || palloc->total_cnt <= PBLE_PER_PAGE)
@@ -455,11 +446,11 @@ get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
* @pble_cnt: #of pbles requested
* @level1_only: true if only pble level 1 to acquire
*/
-enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_pble_alloc *palloc,
- u32 pble_cnt, bool level1_only)
+int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u32 pble_cnt,
+ bool level1_only)
{
- enum irdma_status_code status = 0;
+ int status = 0;
int max_sds = 0;
int i;
diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h
index aa20827dcc9d..29d295463559 100644
--- a/drivers/infiniband/hw/irdma/pble.h
+++ b/drivers/infiniband/hw/irdma/pble.h
@@ -69,7 +69,7 @@ struct irdma_add_page_info {
struct irdma_chunk {
struct list_head list;
struct irdma_dma_info dmainfo;
- void *bitmapbuf;
+ unsigned long *bitmapbuf;
u32 sizeofbitmap;
u64 size;
@@ -108,20 +108,18 @@ struct irdma_hmc_pble_rsrc {
};
void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
-enum irdma_status_code
-irdma_hmc_init_pble(struct irdma_sc_dev *dev,
- struct irdma_hmc_pble_rsrc *pble_rsrc);
+int irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc);
void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
struct irdma_pble_alloc *palloc);
-enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
- struct irdma_pble_alloc *palloc,
- u32 pble_cnt, bool level1_only);
-enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
- struct irdma_chunk *pchunk);
-enum irdma_status_code
-irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
- struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
- u64 **vaddr, u64 *fpm_addr);
+int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u32 pble_cnt,
+ bool level1_only);
+int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk);
+int irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
+ u64 **vaddr, u64 *fpm_addr);
void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
struct irdma_pble_chunkinfo *chunkinfo);
void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
@@ -129,7 +127,6 @@ void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
unsigned long *flags);
void irdma_pble_free_paged_mem(struct irdma_chunk *chunk);
-enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk,
- u32 pg_cnt);
+int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt);
void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk);
#endif /* IRDMA_PBLE_H */
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
index a17c0ffb0cc8..9b6e919ae2a9 100644
--- a/drivers/infiniband/hw/irdma/protos.h
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -12,58 +12,51 @@
#define CQP_TIMEOUT_THRESHOLD 500
/* init operations */
-enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver,
- struct irdma_sc_dev *dev,
- struct irdma_device_init_info *info);
+int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info);
void irdma_sc_rt_init(struct irdma_sc_dev *dev);
void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
__le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch);
-enum irdma_status_code
-irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
- struct irdma_fast_reg_stag_info *info, bool post_sq);
+int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq);
/* HMC/FPM functions */
-enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev,
- u8 hmc_fn_id);
+int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id);
/* stats misc */
-enum irdma_status_code
-irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
- struct irdma_vsi_pestat *pestat, bool wait);
+int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat, bool wait);
void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev,
struct irdma_vsi_pestat *pestat);
void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats,
struct irdma_dev_hw_stats *stats_values,
u64 *hw_stats_regs_32, u64 *hw_stats_regs_64,
u8 hw_rev);
-enum irdma_status_code
-irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
- struct irdma_ws_node_info *node_info);
-enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_ceq *sc_ceq, u8 op);
-enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_aeq *sc_aeq, u8 op);
-enum irdma_status_code
-irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
- struct irdma_stats_inst_info *stats_info);
+int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
+ struct irdma_ws_node_info *node_info);
+int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq,
+ u8 op);
+int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq,
+ u8 op);
+int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
+ struct irdma_stats_inst_info *stats_info);
u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev);
void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id);
void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats,
struct irdma_gather_stats *gather_stats,
struct irdma_gather_stats *last_gather_stats);
/* vsi functions */
-enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
- struct irdma_vsi_stats_info *info);
+int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info);
void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi);
void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
struct irdma_vsi_init_info *info);
-enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq,
- struct irdma_sc_cq *cq);
+int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
/* misc L2 param change functions */
void irdma_change_l2params(struct irdma_sc_vsi *vsi,
struct irdma_l2params *l2params);
void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 suspend);
-enum irdma_status_code irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp,
- u8 cmd);
+int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 cmd);
void irdma_qp_add_qos(struct irdma_sc_qp *qp);
void irdma_qp_rem_qos(struct irdma_sc_qp *qp);
struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
@@ -81,31 +74,26 @@ void irdma_terminate_received(struct irdma_sc_qp *qp,
/* misc */
u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type);
void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp);
-enum irdma_status_code
-irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
- u8 hmc_fn_id, bool post_sq,
- bool poll_registers);
-enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev,
- u32 qp_count);
-enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev);
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers);
+int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count);
+int irdma_get_rdma_features(struct irdma_sc_dev *dev);
void free_sd_mem(struct irdma_sc_dev *dev);
-enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
- struct cqp_cmds_info *pcmdinfo);
-enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev);
-enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *info);
-enum irdma_status_code
-irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-enum irdma_status_code
-irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *mem);
-enum irdma_status_code
-irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
- struct irdma_hmc_fcn_info *hmcfcninfo,
- u16 *pmf_idx);
+int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo);
+int irdma_process_bh(struct irdma_sc_dev *dev);
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
+int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
+int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
+int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo,
+ u16 *pmf_idx);
void irdma_add_dev_ref(struct irdma_sc_dev *dev);
void irdma_put_dev_ref(struct irdma_sc_dev *dev);
void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
index 58e7d875643b..4ec9639f1bdb 100644
--- a/drivers/infiniband/hw/irdma/puda.c
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
-#include "status.h"
#include "hmc.h"
#include "defs.h"
#include "type.h"
@@ -114,8 +113,7 @@ static void irdma_puda_post_recvbuf(struct irdma_puda_rsrc *rsrc, u32 wqe_idx,
* @rsrc: resource to use for buffer
* @initial: flag if during init time
*/
-static enum irdma_status_code
-irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial)
+static int irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial)
{
u32 i;
u32 invalid_cnt = rsrc->rxq_invalid_cnt;
@@ -124,7 +122,7 @@ irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial)
for (i = 0; i < invalid_cnt; i++) {
buf = irdma_puda_get_bufpool(rsrc);
if (!buf)
- return IRDMA_ERR_list_empty;
+ return -ENOBUFS;
irdma_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf, initial);
rsrc->rx_wqe_idx = ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
rsrc->rxq_invalid_cnt--;
@@ -193,19 +191,16 @@ static void irdma_puda_dele_buf(struct irdma_sc_dev *dev,
static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
u32 *wqe_idx)
{
- __le64 *wqe = NULL;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
*wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
if (!*wqe_idx)
qp->swqe_polarity = !qp->swqe_polarity;
IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code);
if (ret_code)
- return wqe;
-
- wqe = qp->sq_base[*wqe_idx].elem;
+ return NULL;
- return wqe;
+ return qp->sq_base[*wqe_idx].elem;
}
/**
@@ -213,8 +208,8 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
* @cq: cq for poll
* @info: info return for successful completion
*/
-static enum irdma_status_code
-irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
+static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
+ struct irdma_puda_cmpl_info *info)
{
struct irdma_cq_uk *cq_uk = &cq->cq_uk;
u64 qword0, qword2, qword3, qword6;
@@ -233,7 +228,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
get_64bit_val(cqe, 24, &qword3);
valid_bit = (bool)FIELD_GET(IRDMA_CQ_VALID, qword3);
if (valid_bit != cq_uk->polarity)
- return IRDMA_ERR_Q_EMPTY;
+ return -ENOENT;
if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
@@ -246,7 +241,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
if (!peek_head)
polarity ^= 1;
if (polarity != cq_uk->polarity)
- return IRDMA_ERR_Q_EMPTY;
+ return -ENOENT;
IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
@@ -267,7 +262,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
major_err = (u32)(FIELD_GET(IRDMA_CQ_MAJERR, qword3));
minor_err = (u32)(FIELD_GET(IRDMA_CQ_MINERR, qword3));
info->compl_error = major_err << 16 | minor_err;
- return IRDMA_ERR_CQ_COMPL_ERROR;
+ return -EIO;
}
get_64bit_val(cqe, 0, &qword0);
@@ -319,14 +314,13 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
* @cq: cq getting interrupt
* @compl_err: return any completion err
*/
-enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev,
- struct irdma_sc_cq *cq,
- u32 *compl_err)
+int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq,
+ u32 *compl_err)
{
struct irdma_qp_uk *qp;
struct irdma_cq_uk *cq_uk = &cq->cq_uk;
struct irdma_puda_cmpl_info info = {};
- enum irdma_status_code ret = 0;
+ int ret = 0;
struct irdma_puda_buf *buf;
struct irdma_puda_rsrc *rsrc;
u8 cq_type = cq->cq_type;
@@ -337,24 +331,24 @@ enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev,
cq->vsi->ieq;
} else {
ibdev_dbg(to_ibdev(dev), "PUDA: qp_type error\n");
- return IRDMA_ERR_BAD_PTR;
+ return -EINVAL;
}
ret = irdma_puda_poll_info(cq, &info);
*compl_err = info.compl_error;
- if (ret == IRDMA_ERR_Q_EMPTY)
+ if (ret == -ENOENT)
return ret;
if (ret)
goto done;
qp = info.qp;
if (!qp || !rsrc) {
- ret = IRDMA_ERR_BAD_PTR;
+ ret = -EFAULT;
goto done;
}
if (qp->qp_id != rsrc->qp_id) {
- ret = IRDMA_ERR_BAD_PTR;
+ ret = -EFAULT;
goto done;
}
@@ -422,8 +416,7 @@ done:
* @qp: puda qp for send
* @info: buffer information for transmit
*/
-enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp,
- struct irdma_puda_send_info *info)
+int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info)
{
__le64 *wqe;
u32 iplen, l4len;
@@ -443,7 +436,7 @@ enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp,
wqe = irdma_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
/* Third line of WQE descriptor */
@@ -503,7 +496,7 @@ void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc,
struct irdma_puda_buf *buf)
{
struct irdma_puda_send_info info;
- enum irdma_status_code ret = 0;
+ int ret = 0;
unsigned long flags;
spin_lock_irqsave(&rsrc->bufpool_lock, flags);
@@ -603,19 +596,18 @@ static void irdma_puda_qp_setctx(struct irdma_puda_rsrc *rsrc)
* @dev: Device
* @qp: Resource qp
*/
-static enum irdma_status_code irdma_puda_qp_wqe(struct irdma_sc_dev *dev,
- struct irdma_sc_qp *qp)
+static int irdma_puda_qp_wqe(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
{
struct irdma_sc_cqp *cqp;
__le64 *wqe;
u64 hdr;
struct irdma_ccq_cqe_info compl_info;
- enum irdma_status_code status = 0;
+ int status = 0;
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
set_64bit_val(wqe, 40, qp->shadow_area_pa);
@@ -643,11 +635,11 @@ static enum irdma_status_code irdma_puda_qp_wqe(struct irdma_sc_dev *dev,
* irdma_puda_qp_create - create qp for resource
* @rsrc: resource to use for buffer
*/
-static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
+static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
{
struct irdma_sc_qp *qp = &rsrc->qp;
struct irdma_qp_uk *ukqp = &qp->qp_uk;
- enum irdma_status_code ret = 0;
+ int ret = 0;
u32 sq_size, rq_size;
struct irdma_dma_mem *mem;
@@ -659,7 +651,7 @@ static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
rsrc->qpmem.size, &rsrc->qpmem.pa,
GFP_KERNEL);
if (!rsrc->qpmem.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
mem = &rsrc->qpmem;
memset(mem->va, 0, rsrc->qpmem.size);
@@ -722,19 +714,18 @@ static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
* @dev: Device
* @cq: resource for cq
*/
-static enum irdma_status_code irdma_puda_cq_wqe(struct irdma_sc_dev *dev,
- struct irdma_sc_cq *cq)
+static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
{
__le64 *wqe;
struct irdma_sc_cqp *cqp;
u64 hdr;
struct irdma_ccq_cqe_info compl_info;
- enum irdma_status_code status = 0;
+ int status = 0;
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
@@ -775,11 +766,11 @@ static enum irdma_status_code irdma_puda_cq_wqe(struct irdma_sc_dev *dev,
* irdma_puda_cq_create - create cq for resource
* @rsrc: resource for which cq to create
*/
-static enum irdma_status_code irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc)
+static int irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc)
{
struct irdma_sc_dev *dev = rsrc->dev;
struct irdma_sc_cq *cq = &rsrc->cq;
- enum irdma_status_code ret = 0;
+ int ret = 0;
u32 cqsize;
struct irdma_dma_mem *mem;
struct irdma_cq_init_info info = {};
@@ -792,7 +783,7 @@ static enum irdma_status_code irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc)
rsrc->cqmem.va = dma_alloc_coherent(dev->hw->device, rsrc->cqmem.size,
&rsrc->cqmem.pa, GFP_KERNEL);
if (!rsrc->cqmem.va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
mem = &rsrc->cqmem;
info.dev = dev;
@@ -833,7 +824,7 @@ error:
*/
static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc)
{
- enum irdma_status_code ret;
+ int ret;
struct irdma_ccq_cqe_info compl_info;
struct irdma_sc_dev *dev = rsrc->dev;
@@ -865,7 +856,7 @@ static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc)
*/
static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc)
{
- enum irdma_status_code ret;
+ int ret;
struct irdma_ccq_cqe_info compl_info;
struct irdma_sc_dev *dev = rsrc->dev;
@@ -967,8 +958,7 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
* @rsrc: resource for buffer allocation
* @count: number of buffers to create
*/
-static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc,
- u32 count)
+static int irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, u32 count)
{
u32 i;
struct irdma_puda_buf *buf;
@@ -978,7 +968,7 @@ static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc,
buf = irdma_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
if (!buf) {
rsrc->stats_buf_alloc_fail++;
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
irdma_puda_ret_bufpool(rsrc, buf);
rsrc->alloc_buf_count++;
@@ -1001,11 +991,11 @@ static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc,
* @vsi: sc VSI struct
* @info: resource information
*/
-enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
- struct irdma_puda_rsrc_info *info)
+int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_rsrc_info *info)
{
struct irdma_sc_dev *dev = vsi->dev;
- enum irdma_status_code ret = 0;
+ int ret = 0;
struct irdma_puda_rsrc *rsrc;
u32 pudasize;
u32 sqwridsize, rqwridsize;
@@ -1023,12 +1013,12 @@ enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
vmem = &vsi->ieq_mem;
break;
default:
- return IRDMA_NOT_SUPPORTED;
+ return -EOPNOTSUPP;
}
vmem->size = pudasize + sqwridsize + rqwridsize;
vmem->va = kzalloc(vmem->size, GFP_KERNEL);
if (!vmem->va)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
rsrc = vmem->va;
spin_lock_init(&rsrc->bufpool_lock);
@@ -1046,7 +1036,7 @@ enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
rsrc->xmit_complete = irdma_ieq_tx_compl;
break;
default:
- return IRDMA_NOT_SUPPORTED;
+ return -EOPNOTSUPP;
}
rsrc->type = info->type;
@@ -1323,12 +1313,12 @@ static void irdma_ieq_compl_pfpdu(struct irdma_puda_rsrc *ieq,
* @buf: first receive buffer
* @fpdu_len: total length of fpdu
*/
-static enum irdma_status_code
-irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist,
- struct list_head *pbufl, struct irdma_puda_buf *buf,
- u16 fpdu_len)
+static int irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct irdma_puda_buf *buf, u16 fpdu_len)
{
- enum irdma_status_code status = 0;
+ int status = 0;
struct irdma_puda_buf *nextbuf;
u32 nextseqnum;
u16 plen = fpdu_len - buf->datalen;
@@ -1338,13 +1328,13 @@ irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist,
do {
nextbuf = irdma_puda_get_listbuf(rxlist);
if (!nextbuf) {
- status = IRDMA_ERR_list_empty;
+ status = -ENOBUFS;
break;
}
list_add_tail(&nextbuf->list, pbufl);
if (nextbuf->seqnum != nextseqnum) {
pfpdu->bad_seq_num++;
- status = IRDMA_ERR_SEQ_NUM;
+ status = -ERANGE;
break;
}
if (nextbuf->datalen >= plen) {
@@ -1366,11 +1356,11 @@ irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist,
* @buf: receive buffer
* @fpdu_len: fpdu len in the buffer
*/
-static enum irdma_status_code
-irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu,
- struct irdma_puda_buf *buf, u16 fpdu_len)
+static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
+ struct irdma_pfpdu *pfpdu,
+ struct irdma_puda_buf *buf, u16 fpdu_len)
{
- enum irdma_status_code status = 0;
+ int status = 0;
u8 *crcptr;
u32 mpacrc;
u32 seqnum = buf->seqnum;
@@ -1390,7 +1380,7 @@ irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu,
txbuf = irdma_puda_get_bufpool(ieq);
if (!txbuf) {
pfpdu->no_tx_bufs++;
- status = IRDMA_ERR_NO_TXBUFS;
+ status = -ENOBUFS;
goto error;
}
@@ -1434,9 +1424,9 @@ error:
* @pfpdu: partial management per user qp
* @buf: receive buffer
*/
-static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
- struct irdma_pfpdu *pfpdu,
- struct irdma_puda_buf *buf)
+static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
+ struct irdma_pfpdu *pfpdu,
+ struct irdma_puda_buf *buf)
{
u16 fpdu_len = 0;
u16 datalen = buf->datalen;
@@ -1450,7 +1440,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
bool partial = false;
struct irdma_puda_buf *txbuf;
struct list_head *rxlist = &pfpdu->rxlist;
- enum irdma_status_code ret = 0;
+ int ret = 0;
ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
while (datalen) {
@@ -1459,7 +1449,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
ibdev_dbg(to_ibdev(ieq->dev),
"IEQ: error bad fpdu len\n");
list_add(&buf->list, rxlist);
- return IRDMA_ERR_MPA_CRC;
+ return -EINVAL;
}
if (datalen < fpdu_len) {
@@ -1475,7 +1465,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
list_add(&buf->list, rxlist);
ibdev_dbg(to_ibdev(ieq->dev),
"ERR: IRDMA_ERR_MPA_CRC\n");
- return IRDMA_ERR_MPA_CRC;
+ return -EINVAL;
}
full++;
pfpdu->fpdu_processed++;
@@ -1490,7 +1480,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
if (!txbuf) {
pfpdu->no_tx_bufs++;
list_add(&buf->list, rxlist);
- return IRDMA_ERR_NO_TXBUFS;
+ return -ENOBUFS;
}
/* modify txbuf's buffer header */
irdma_ieq_setup_tx_buf(buf, txbuf);
@@ -1539,7 +1529,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
struct irdma_pfpdu *pfpdu = &qp->pfpdu;
struct list_head *rxlist = &pfpdu->rxlist;
struct irdma_puda_buf *buf;
- enum irdma_status_code status;
+ int status;
do {
if (list_empty(rxlist))
@@ -1557,7 +1547,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
}
/* keep processing buffers from the head of the list */
status = irdma_ieq_process_buf(ieq, pfpdu, buf);
- if (status == IRDMA_ERR_MPA_CRC) {
+ if (status == -EINVAL) {
pfpdu->mpa_crc_err = true;
while (!list_empty(rxlist)) {
buf = irdma_puda_get_listbuf(rxlist);
@@ -1576,8 +1566,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
* @qp: qp pointer
* @buf: buf received on IEQ used to create AH
*/
-static enum irdma_status_code irdma_ieq_create_ah(struct irdma_sc_qp *qp,
- struct irdma_puda_buf *buf)
+static int irdma_ieq_create_ah(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf)
{
struct irdma_ah_info ah_info = {};
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
index db3a51170020..5f5124db6ddf 100644
--- a/drivers/infiniband/hw/irdma/puda.h
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -151,42 +151,33 @@ void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc,
struct irdma_puda_buf *buf);
void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc,
struct irdma_puda_buf *buf);
-enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp,
- struct irdma_puda_send_info *info);
-enum irdma_status_code
-irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
- struct irdma_puda_rsrc_info *info);
+int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info);
+int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_rsrc_info *info);
void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
bool reset);
-enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev,
- struct irdma_sc_cq *cq,
- u32 *compl_err);
+int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq,
+ u32 *compl_err);
struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
-enum irdma_status_code
-irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
- struct irdma_puda_buf *buf);
-enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr, u32 len, u32 val);
-enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc);
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val);
+int irdma_init_hash_desc(struct shash_desc **desc);
void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
void irdma_free_hash_desc(struct shash_desc *desc);
-void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
- u32 seqnum);
-enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_qp *qp);
-enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_cq *cq);
-enum irdma_status_code irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
+int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
+int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp,
struct irdma_ah_info *ah_info);
-enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev,
- struct irdma_ah_info *ah_info,
- bool wait, enum puda_rsrc_type type,
- void *cb_param,
- struct irdma_sc_ah **ah);
+int irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info, bool wait,
+ enum puda_rsrc_type type, void *cb_param,
+ struct irdma_sc_ah **ah);
void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah);
void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
struct irdma_puda_rsrc *ieq);
diff --git a/drivers/infiniband/hw/irdma/status.h b/drivers/infiniband/hw/irdma/status.h
deleted file mode 100644
index 22ea3888253a..000000000000
--- a/drivers/infiniband/hw/irdma/status.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-/* Copyright (c) 2015 - 2020 Intel Corporation */
-#ifndef IRDMA_STATUS_H
-#define IRDMA_STATUS_H
-
-/* Error Codes */
-enum irdma_status_code {
- IRDMA_SUCCESS = 0,
- IRDMA_ERR_NVM = -1,
- IRDMA_ERR_NVM_CHECKSUM = -2,
- IRDMA_ERR_CFG = -4,
- IRDMA_ERR_PARAM = -5,
- IRDMA_ERR_DEVICE_NOT_SUPPORTED = -6,
- IRDMA_ERR_RESET_FAILED = -7,
- IRDMA_ERR_SWFW_SYNC = -8,
- IRDMA_ERR_NO_MEMORY = -9,
- IRDMA_ERR_BAD_PTR = -10,
- IRDMA_ERR_INVALID_PD_ID = -11,
- IRDMA_ERR_INVALID_QP_ID = -12,
- IRDMA_ERR_INVALID_CQ_ID = -13,
- IRDMA_ERR_INVALID_CEQ_ID = -14,
- IRDMA_ERR_INVALID_AEQ_ID = -15,
- IRDMA_ERR_INVALID_SIZE = -16,
- IRDMA_ERR_INVALID_ARP_INDEX = -17,
- IRDMA_ERR_INVALID_FPM_FUNC_ID = -18,
- IRDMA_ERR_QP_INVALID_MSG_SIZE = -19,
- IRDMA_ERR_QP_TOOMANY_WRS_POSTED = -20,
- IRDMA_ERR_INVALID_FRAG_COUNT = -21,
- IRDMA_ERR_Q_EMPTY = -22,
- IRDMA_ERR_INVALID_ALIGNMENT = -23,
- IRDMA_ERR_FLUSHED_Q = -24,
- IRDMA_ERR_INVALID_PUSH_PAGE_INDEX = -25,
- IRDMA_ERR_INVALID_INLINE_DATA_SIZE = -26,
- IRDMA_ERR_TIMEOUT = -27,
- IRDMA_ERR_OPCODE_MISMATCH = -28,
- IRDMA_ERR_CQP_COMPL_ERROR = -29,
- IRDMA_ERR_INVALID_VF_ID = -30,
- IRDMA_ERR_INVALID_HMCFN_ID = -31,
- IRDMA_ERR_BACKING_PAGE_ERROR = -32,
- IRDMA_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
- IRDMA_ERR_INVALID_PBLE_INDEX = -34,
- IRDMA_ERR_INVALID_SD_INDEX = -35,
- IRDMA_ERR_INVALID_PAGE_DESC_INDEX = -36,
- IRDMA_ERR_INVALID_SD_TYPE = -37,
- IRDMA_ERR_MEMCPY_FAILED = -38,
- IRDMA_ERR_INVALID_HMC_OBJ_INDEX = -39,
- IRDMA_ERR_INVALID_HMC_OBJ_COUNT = -40,
- IRDMA_ERR_BUF_TOO_SHORT = -43,
- IRDMA_ERR_BAD_IWARP_CQE = -44,
- IRDMA_ERR_NVM_BLANK_MODE = -45,
- IRDMA_ERR_NOT_IMPL = -46,
- IRDMA_ERR_PE_DOORBELL_NOT_ENA = -47,
- IRDMA_ERR_NOT_READY = -48,
- IRDMA_NOT_SUPPORTED = -49,
- IRDMA_ERR_FIRMWARE_API_VER = -50,
- IRDMA_ERR_RING_FULL = -51,
- IRDMA_ERR_MPA_CRC = -61,
- IRDMA_ERR_NO_TXBUFS = -62,
- IRDMA_ERR_SEQ_NUM = -63,
- IRDMA_ERR_list_empty = -64,
- IRDMA_ERR_INVALID_MAC_ADDR = -65,
- IRDMA_ERR_BAD_STAG = -66,
- IRDMA_ERR_CQ_COMPL_ERROR = -67,
- IRDMA_ERR_Q_DESTROYED = -68,
- IRDMA_ERR_INVALID_FEAT_CNT = -69,
- IRDMA_ERR_REG_CQ_FULL = -70,
- IRDMA_ERR_VF_MSG_ERROR = -71,
- IRDMA_ERR_NO_INTR = -72,
- IRDMA_ERR_REG_QSET = -73,
-};
-#endif /* IRDMA_STATUS_H */
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
index 9483bb3e10ea..517d41a1c289 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -2,7 +2,6 @@
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_TYPE_H
#define IRDMA_TYPE_H
-#include "status.h"
#include "osdep.h"
#include "irdma.h"
#include "user.h"
@@ -99,6 +98,7 @@ enum irdma_term_mpa_errors {
enum irdma_qp_event_type {
IRDMA_QP_EVENT_CATASTROPHIC,
IRDMA_QP_EVENT_ACCESS_ERR,
+ IRDMA_QP_EVENT_REQ_ERR,
};
enum irdma_hw_stats_index_32b {
@@ -402,8 +402,8 @@ struct irdma_sc_cqp {
u64 host_ctx_pa;
void *back_cqp;
struct irdma_sc_dev *dev;
- enum irdma_status_code (*process_cqp_sds)(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *info);
+ int (*process_cqp_sds)(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
struct irdma_dma_mem sdbuf;
struct irdma_ring sq_ring;
struct irdma_cqp_quanta *sq_base;
@@ -605,12 +605,14 @@ struct irdma_sc_vsi {
struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
struct irdma_vsi_pestat *pestat;
atomic_t qp_suspend_reqs;
- enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi,
- struct irdma_ws_node *tc_node);
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
void (*unregister_qset)(struct irdma_sc_vsi *vsi,
struct irdma_ws_node *tc_node);
u8 qos_rel_bw;
u8 qos_prio_type;
+ u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+ bool dscp_mode:1;
};
struct irdma_sc_dev {
@@ -655,7 +657,7 @@ struct irdma_sc_dev {
bool vchnl_up:1;
bool ceq_valid:1;
u8 pci_rev;
- enum irdma_status_code (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri);
+ int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri);
void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri);
void (*ws_reset)(struct irdma_sc_vsi *vsi);
};
@@ -735,11 +737,13 @@ struct irdma_l2params {
u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY];
u16 mtu;
u8 up2tc[IRDMA_MAX_USER_PRIORITY];
+ u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
u8 num_tc;
u8 vsi_rel_bw;
u8 vsi_prio_type;
bool mtu_changed:1;
bool tc_changed:1;
+ bool dscp_mode:1;
};
struct irdma_vsi_init_info {
@@ -750,8 +754,8 @@ struct irdma_vsi_init_info {
u16 pf_data_vsi_num;
enum irdma_vm_vf_type vm_vf_type;
u16 vm_id;
- enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi,
- struct irdma_ws_node *tc_node);
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
void (*unregister_qset)(struct irdma_sc_vsi *vsi,
struct irdma_ws_node *tc_node);
};
@@ -1198,29 +1202,27 @@ struct irdma_irq_ops {
};
void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq);
-enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
- bool check_overflow, bool post_sq);
-enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch,
- bool post_sq);
-enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
- struct irdma_ccq_cqe_info *info);
-enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *ccq,
- struct irdma_ccq_init_info *info);
-
-enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch);
-enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq);
-
-enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch,
- bool post_sq);
-enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
- struct irdma_ceq_init_info *info);
+int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
+ bool check_overflow, bool post_sq);
+int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq);
+int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_cqe_info *info);
+int irdma_sc_ccq_init(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_init_info *info);
+
+int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch);
+int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq);
+
+int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq);
+int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ struct irdma_ceq_init_info *info);
void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq);
void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq);
-enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
- struct irdma_aeq_init_info *info);
-enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
- struct irdma_aeqe_info *info);
+int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
+ struct irdma_aeq_init_info *info);
+int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ struct irdma_aeqe_info *info);
void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
@@ -1228,31 +1230,27 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i
void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable);
void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout,
struct irdma_sc_dev *dev);
-enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err,
- u16 *min_err);
-enum irdma_status_code irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp);
-enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
- struct irdma_cqp_init_info *info);
+int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err);
+int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp);
+int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_init_info *info);
void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
-enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode,
- struct irdma_ccq_cqe_info *cmpl_info);
-enum irdma_status_code irdma_sc_fast_register(struct irdma_sc_qp *qp,
- struct irdma_fast_reg_stag_info *info,
- bool post_sq);
-enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp,
- struct irdma_create_qp_info *info,
- u64 scratch, bool post_sq);
-enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp,
- u64 scratch, bool remove_hash_idx,
- bool ignore_mw_bnd, bool post_sq);
-enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
- struct irdma_qp_flush_info *info,
- u64 scratch, bool post_sq);
-enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp,
- struct irdma_qp_init_info *info);
-enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp,
- struct irdma_modify_qp_info *info,
- u64 scratch, bool post_sq);
+int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode,
+ struct irdma_ccq_cqe_info *cmpl_info);
+int irdma_sc_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info, bool post_sq);
+int irdma_sc_qp_create(struct irdma_sc_qp *qp,
+ struct irdma_create_qp_info *info, u64 scratch,
+ bool post_sq);
+int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
+ bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq);
+int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, u64 scratch,
+ bool post_sq);
+int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info);
+int irdma_sc_qp_modify(struct irdma_sc_qp *qp,
+ struct irdma_modify_qp_info *info, u64 scratch,
+ bool post_sq);
void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size,
irdma_stag stag);
@@ -1261,14 +1259,12 @@ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx,
struct irdma_qp_host_ctx_info *info);
void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
struct irdma_qp_host_ctx_info *info);
-enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch,
- bool post_sq);
-enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq,
- struct irdma_cq_init_info *info);
+int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq);
+int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info);
void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info);
-enum irdma_status_code irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp,
- u64 scratch, u8 hmc_fn_id,
- bool post_sq, bool poll_registers);
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers);
void sc_vsi_update_stats(struct irdma_sc_vsi *vsi);
struct cqp_info {
diff --git a/drivers/infiniband/hw/irdma/uda.c b/drivers/infiniband/hw/irdma/uda.c
index f5b1b6150cdc..284cec2a74de 100644
--- a/drivers/infiniband/hw/irdma/uda.c
+++ b/drivers/infiniband/hw/irdma/uda.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2016 - 2021 Intel Corporation */
+#include <linux/etherdevice.h>
+
#include "osdep.h"
-#include "status.h"
#include "hmc.h"
#include "defs.h"
#include "type.h"
@@ -16,16 +17,15 @@
* @op: Operation
* @scratch: u64 saved to be used during cqp completion
*/
-enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp,
- struct irdma_ah_info *info,
- u32 op, u64 scratch)
+int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
+ u32 op, u64 scratch)
{
__le64 *wqe;
u64 qw1, qw2;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr) << 16);
qw1 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXLO, info->pd_idx) |
@@ -84,8 +84,7 @@ enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp,
* irdma_create_mg_ctx() - create a mcg context
* @info: multicast group context info
*/
-static enum irdma_status_code
-irdma_create_mg_ctx(struct irdma_mcast_grp_info *info)
+static void irdma_create_mg_ctx(struct irdma_mcast_grp_info *info)
{
struct irdma_mcast_grp_ctx_entry_info *entry_info = NULL;
u8 idx = 0; /* index in the array */
@@ -104,8 +103,6 @@ irdma_create_mg_ctx(struct irdma_mcast_grp_info *info)
ctx_idx++;
}
}
-
- return 0;
}
/**
@@ -115,27 +112,24 @@ irdma_create_mg_ctx(struct irdma_mcast_grp_info *info)
* @op: operation to perform
* @scratch: u64 saved to be used during cqp completion
*/
-enum irdma_status_code irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
- struct irdma_mcast_grp_info *info,
- u32 op, u64 scratch)
+int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info, u32 op,
+ u64 scratch)
{
__le64 *wqe;
- enum irdma_status_code ret_code = 0;
if (info->mg_id >= IRDMA_UDA_MAX_FSI_MGS) {
ibdev_dbg(to_ibdev(cqp->dev), "WQE: mg_id out of range\n");
- return IRDMA_ERR_PARAM;
+ return -EINVAL;
}
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe) {
ibdev_dbg(to_ibdev(cqp->dev), "WQE: ring full\n");
- return IRDMA_ERR_RING_FULL;
+ return -ENOMEM;
}
- ret_code = irdma_create_mg_ctx(info);
- if (ret_code)
- return ret_code;
+ irdma_create_mg_ctx(info);
set_64bit_val(wqe, 32, info->dma_mem_mc.pa);
set_64bit_val(wqe, 16,
@@ -196,8 +190,8 @@ static bool irdma_compare_mgs(struct irdma_mcast_grp_ctx_entry_info *entry1,
* @ctx: Multcast group context
* @mg: Multcast group info
*/
-enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
- struct irdma_mcast_grp_ctx_entry_info *mg)
+int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg)
{
u32 idx;
bool free_entry_found = false;
@@ -226,7 +220,7 @@ enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
return 0;
}
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
/**
@@ -237,8 +231,8 @@ enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
* Finds and removes a specific mulicast group from context, all
* parameters must match to remove a multicast group.
*/
-enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
- struct irdma_mcast_grp_ctx_entry_info *mg)
+int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg)
{
u32 idx;
@@ -267,5 +261,5 @@ enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
}
}
- return IRDMA_ERR_PARAM;
+ return -EINVAL;
}
diff --git a/drivers/infiniband/hw/irdma/uda.h b/drivers/infiniband/hw/irdma/uda.h
index a4ad0367dc96..fe4820ff0cca 100644
--- a/drivers/infiniband/hw/irdma/uda.h
+++ b/drivers/infiniband/hw/irdma/uda.h
@@ -32,56 +32,54 @@ struct irdma_sc_ah {
struct irdma_ah_info ah_info;
};
-enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
- struct irdma_mcast_grp_ctx_entry_info *mg);
-enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
- struct irdma_mcast_grp_ctx_entry_info *mg);
-enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
- u32 op, u64 scratch);
-enum irdma_status_code irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
- struct irdma_mcast_grp_info *info,
- u32 op, u64 scratch);
+int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg);
+int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg);
+int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
+ u32 op, u64 scratch);
+int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info, u32 op,
+ u64 scratch);
static inline void irdma_sc_init_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah)
{
ah->dev = dev;
}
-static inline enum irdma_status_code irdma_sc_create_ah(struct irdma_sc_cqp *cqp,
- struct irdma_ah_info *info,
- u64 scratch)
+static inline int irdma_sc_create_ah(struct irdma_sc_cqp *cqp,
+ struct irdma_ah_info *info, u64 scratch)
{
return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_CREATE_ADDR_HANDLE,
scratch);
}
-static inline enum irdma_status_code irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp,
- struct irdma_ah_info *info,
- u64 scratch)
+static inline int irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp,
+ struct irdma_ah_info *info, u64 scratch)
{
return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_DESTROY_ADDR_HANDLE,
scratch);
}
-static inline enum irdma_status_code irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp,
- struct irdma_mcast_grp_info *info,
- u64 scratch)
+static inline int irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
{
return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_CREATE_MCAST_GRP,
scratch);
}
-static inline enum irdma_status_code irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp,
- struct irdma_mcast_grp_info *info,
- u64 scratch)
+static inline int irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
{
return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_MODIFY_MCAST_GRP,
scratch);
}
-static inline enum irdma_status_code irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp,
- struct irdma_mcast_grp_info *info,
- u64 scratch)
+static inline int irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
{
return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_DESTROY_MCAST_GRP,
scratch);
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index 57a9444e9ea7..a6e5d350a94c 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
-#include "status.h"
#include "defs.h"
#include "user.h"
#include "irdma.h"
@@ -56,7 +55,7 @@ static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset,
* irdma_nop_1 - insert a NOP wqe
* @qp: hw qp ptr
*/
-static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp)
+static int irdma_nop_1(struct irdma_qp_uk *qp)
{
u64 hdr;
__le64 *wqe;
@@ -64,7 +63,7 @@ static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp)
bool signaled = false;
if (!qp->sq_ring.head)
- return IRDMA_ERR_PARAM;
+ return -EINVAL;
wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
wqe = qp->sq_base[wqe_idx].elem;
@@ -245,7 +244,7 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx)
{
__le64 *wqe;
- enum irdma_status_code ret_code;
+ int ret_code;
if (IRDMA_RING_FULL_ERR(qp->rq_ring))
return NULL;
@@ -268,16 +267,15 @@ __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx)
* @info: post sq information
* @post_sq: flag to post sq
*/
-enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq)
+int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq)
{
u64 hdr;
__le64 *wqe;
struct irdma_rdma_write *op_info;
u32 i, wqe_idx;
u32 total_size = 0, byte_off;
- enum irdma_status_code ret_code;
+ int ret_code;
u32 frag_cnt, addl_frag_cnt;
bool read_fence = false;
u16 quanta;
@@ -286,7 +284,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp,
op_info = &info->op.rdma_write;
if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
for (i = 0; i < op_info->num_lo_sges; i++)
total_size += op_info->lo_sg_list[i].length;
@@ -305,7 +303,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp,
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -370,12 +368,11 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp,
* @inv_stag: flag for inv_stag
* @post_sq: flag to post sq
*/
-enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool inv_stag, bool post_sq)
+int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq)
{
struct irdma_rdma_read *op_info;
- enum irdma_status_code ret_code;
+ int ret_code;
u32 i, byte_off, total_size = 0;
bool local_fence = false;
u32 addl_frag_cnt;
@@ -388,7 +385,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp,
op_info = &info->op.rdma_read;
if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
for (i = 0; i < op_info->num_lo_sges; i++)
total_size += op_info->lo_sg_list[i].length;
@@ -400,7 +397,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp,
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -457,15 +454,14 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp,
* @info: post sq information
* @post_sq: flag to post sq
*/
-enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq)
+int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq)
{
__le64 *wqe;
struct irdma_post_send *op_info;
u64 hdr;
u32 i, wqe_idx, total_size = 0, byte_off;
- enum irdma_status_code ret_code;
+ int ret_code;
u32 frag_cnt, addl_frag_cnt;
bool read_fence = false;
u16 quanta;
@@ -474,7 +470,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
op_info = &info->op.send;
if (qp->max_sq_frag_cnt < op_info->num_sges)
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
for (i = 0; i < op_info->num_sges; i++)
total_size += op_info->sg_list[i].length;
@@ -490,7 +486,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -501,7 +497,8 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
i = 0;
} else {
- qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list,
+ qp->wqe_ops.iw_set_fragment(wqe, 0,
+ frag_cnt ? op_info->sg_list : NULL,
qp->swqe_polarity);
i = 1;
}
@@ -678,9 +675,8 @@ static u16 irdma_inline_data_size_to_quanta(u32 data_size)
* @info: post sq information
* @post_sq: flag to post sq
*/
-enum irdma_status_code
-irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
- bool post_sq)
+int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
{
__le64 *wqe;
struct irdma_inline_rdma_write *op_info;
@@ -693,13 +689,13 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in
op_info = &info->op.inline_rdma_write;
if (op_info->len > qp->max_inline_data)
- return IRDMA_ERR_INVALID_INLINE_DATA_SIZE;
+ return -EINVAL;
quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -745,9 +741,8 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in
* @info: post sq information
* @post_sq: flag to post sq
*/
-enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq)
+int irdma_uk_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
{
__le64 *wqe;
struct irdma_post_inline_send *op_info;
@@ -760,13 +755,13 @@ enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp,
op_info = &info->op.inline_send;
if (op_info->len > qp->max_inline_data)
- return IRDMA_ERR_INVALID_INLINE_DATA_SIZE;
+ return -EINVAL;
quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -817,9 +812,9 @@ enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp,
* @info: post sq information
* @post_sq: flag to post sq
*/
-enum irdma_status_code
-irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info, bool post_sq)
+int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
{
__le64 *wqe;
struct irdma_inv_local_stag *op_info;
@@ -835,7 +830,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
0, info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -871,8 +866,8 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
* @qp: hw qp ptr
* @info: post rq information
*/
-enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
- struct irdma_post_rq_info *info)
+int irdma_uk_post_receive(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info)
{
u32 wqe_idx, i, byte_off;
u32 addl_frag_cnt;
@@ -880,11 +875,11 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
u64 hdr;
if (qp->max_rq_frag_cnt < info->num_sges)
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
qp->rq_wrid_array[wqe_idx] = info->wr_id;
addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0;
@@ -1000,17 +995,18 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
* @cq: hw cq
* @info: cq poll information returned
*/
-enum irdma_status_code
-irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
+int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info)
{
u64 comp_ctx, qword0, qword2, qword3;
__le64 *cqe;
struct irdma_qp_uk *qp;
struct irdma_ring *pring = NULL;
u32 wqe_idx, q_type;
- enum irdma_status_code ret_code;
+ int ret_code;
bool move_cq_head = true;
u8 polarity;
+ u8 op_type;
bool ext_valid;
__le64 *ext_cqe;
@@ -1022,7 +1018,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
get_64bit_val(cqe, 24, &qword3);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
if (polarity != cq->polarity)
- return IRDMA_ERR_Q_EMPTY;
+ return -ENOENT;
/* Ensure CQE contents are read after valid bit is checked */
dma_rmb();
@@ -1045,7 +1041,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
polarity ^= 1;
}
if (polarity != cq->polarity)
- return IRDMA_ERR_Q_EMPTY;
+ return -ENOENT;
/* Ensure ext CQE contents are read after ext valid bit is checked */
dma_rmb();
@@ -1112,7 +1108,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3);
qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
if (!qp || qp->destroy_pending) {
- ret_code = IRDMA_ERR_Q_DESTROYED;
+ ret_code = -EFAULT;
goto exit;
}
wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
@@ -1126,7 +1122,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED ||
info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) {
if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) {
- ret_code = IRDMA_ERR_Q_EMPTY;
+ ret_code = -ENOENT;
goto exit;
}
@@ -1186,14 +1182,13 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
} else {
if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) {
- ret_code = IRDMA_ERR_Q_EMPTY;
+ ret_code = -ENOENT;
goto exit;
}
do {
__le64 *sw_wqe;
u64 wqe_qword;
- u8 op_type;
u32 tail;
tail = qp->sq_ring.tail;
@@ -1210,6 +1205,8 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
break;
}
} while (1);
+ if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
+ info->minor_err = FLUSH_MW_BIND_ERR;
qp->sq_flush_seen = true;
if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
qp->sq_flush_complete = true;
@@ -1303,15 +1300,15 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
* @sqdepth: depth of SQ
*
*/
-enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs,
- u32 sq_size, u8 shift, u32 *sqdepth)
+int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
+ u32 *sqdepth)
{
*sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD);
if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift))
*sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift;
else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
return 0;
}
@@ -1323,15 +1320,15 @@ enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs,
* @shift: shift which determines size of WQE
* @rqdepth: depth of RQ
*/
-enum irdma_status_code irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs,
- u32 rq_size, u8 shift, u32 *rqdepth)
+int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
+ u32 *rqdepth)
{
*rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD);
if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift))
*rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift;
else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
return 0;
}
@@ -1381,17 +1378,16 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
* allowed. Then size of wqe * the number of wqes should be the
* amount of memory allocated for sq and rq.
*/
-enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp,
- struct irdma_qp_uk_init_info *info)
+int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
{
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
u32 sq_ring_size;
u8 sqshift, rqshift;
qp->uk_attrs = info->uk_attrs;
if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags ||
info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags)
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift);
if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) {
@@ -1502,8 +1498,7 @@ void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq)
* @signaled: signaled for completion
* @post_sq: ring doorbell
*/
-enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id,
- bool signaled, bool post_sq)
+int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq)
{
__le64 *wqe;
u64 hdr;
@@ -1515,7 +1510,7 @@ enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id,
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
0, &info);
if (!wqe)
- return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+ return -ENOMEM;
irdma_clr_wqes(qp, wqe_idx);
@@ -1541,7 +1536,7 @@ enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id,
* @frag_cnt: number of fragments
* @quanta: quanta for frag_cnt
*/
-enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
+int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
{
switch (frag_cnt) {
case 0:
@@ -1577,7 +1572,7 @@ enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
*quanta = 8;
break;
default:
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
}
return 0;
@@ -1588,7 +1583,7 @@ enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
* @frag_cnt: number of fragments
* @wqe_size: size in bytes given frag_cnt
*/
-enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size)
+int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size)
{
switch (frag_cnt) {
case 0:
@@ -1615,7 +1610,7 @@ enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size)
*wqe_size = 256;
break;
default:
- return IRDMA_ERR_INVALID_FRAG_COUNT;
+ return -EINVAL;
}
return 0;
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
index 3c811fb88404..2ef61923c926 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -103,6 +103,7 @@ enum irdma_flush_opcode {
FLUSH_FATAL_ERR,
FLUSH_RETRY_EXC_ERR,
FLUSH_MW_BIND_ERR,
+ FLUSH_REM_INV_REQ_ERR,
};
enum irdma_cmpl_status {
@@ -270,29 +271,24 @@ struct irdma_cq_poll_info {
bool imm_valid:1;
};
-enum irdma_status_code irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq);
-enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq);
-
-enum irdma_status_code irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id,
- bool signaled, bool post_sq);
-enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
- struct irdma_post_rq_info *info);
+int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled,
+ bool post_sq);
+int irdma_uk_post_receive(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info);
void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp);
-enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool inv_stag, bool post_sq);
-enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq);
-enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info, bool post_sq);
-enum irdma_status_code irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
- struct irdma_post_sq_info *info,
- bool post_sq);
+int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq);
+int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq);
+int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq);
+int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
struct irdma_wqe_uk_ops {
void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity);
@@ -303,16 +299,16 @@ struct irdma_wqe_uk_ops {
struct irdma_bind_window *op_info);
};
-enum irdma_status_code irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
- struct irdma_cq_poll_info *info);
+int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info);
void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
enum irdma_cmpl_notify cq_notify);
void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size);
void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt);
void irdma_uk_cq_init(struct irdma_cq_uk *cq,
struct irdma_cq_uk_init_info *info);
-enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp,
- struct irdma_qp_uk_init_info *info);
+int irdma_uk_qp_init(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info);
struct irdma_sq_uk_wr_trk_info {
u64 wrid;
u32 wr_len;
@@ -413,16 +409,15 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
struct irdma_post_sq_info *info);
__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx);
void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq);
-enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id,
- bool signaled, bool post_sq);
-enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta);
-enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size);
+int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq);
+int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta);
+int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size);
void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
u32 inline_data, u8 *shift);
-enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs,
- u32 sq_size, u8 shift, u32 *wqdepth);
-enum irdma_status_code irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs,
- u32 rq_size, u8 shift, u32 *wqdepth);
+int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
+ u32 *wqdepth);
+int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
+ u32 *wqdepth);
void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta,
u32 wqe_idx, bool post_sq);
void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx);
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 398736d8c78a..8dfc9e154d73 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -150,31 +150,35 @@ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
void *ptr)
{
struct in_ifaddr *ifa = ptr;
- struct net_device *netdev = ifa->ifa_dev->dev;
+ struct net_device *real_dev, *netdev = ifa->ifa_dev->dev;
struct irdma_device *iwdev;
struct ib_device *ibdev;
u32 local_ipaddr;
- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA);
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
if (!ibdev)
return NOTIFY_DONE;
iwdev = to_iwdev(ibdev);
local_ipaddr = ntohl(ifa->ifa_address);
ibdev_dbg(&iwdev->ibdev,
- "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", netdev,
- event, &local_ipaddr, netdev->dev_addr);
+ "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev,
+ event, &local_ipaddr, real_dev->dev_addr);
switch (event) {
case NETDEV_DOWN:
- irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr,
+ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
&local_ipaddr, true, IRDMA_ARP_DELETE);
- irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false);
+ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false);
irdma_gid_change_event(&iwdev->ibdev);
break;
case NETDEV_UP:
case NETDEV_CHANGEADDR:
- irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr);
- irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true);
+ irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr);
+ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true);
irdma_gid_change_event(&iwdev->ibdev);
break;
default:
@@ -196,32 +200,36 @@ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
- struct net_device *netdev = ifa->idev->dev;
+ struct net_device *real_dev, *netdev = ifa->idev->dev;
struct irdma_device *iwdev;
struct ib_device *ibdev;
u32 local_ipaddr6[4];
- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA);
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
if (!ibdev)
return NOTIFY_DONE;
iwdev = to_iwdev(ibdev);
irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
ibdev_dbg(&iwdev->ibdev,
- "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", netdev,
- event, local_ipaddr6, netdev->dev_addr);
+ "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev,
+ event, local_ipaddr6, real_dev->dev_addr);
switch (event) {
case NETDEV_DOWN:
- irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr,
+ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
local_ipaddr6, false, IRDMA_ARP_DELETE);
- irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false);
+ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false);
irdma_gid_change_event(&iwdev->ibdev);
break;
case NETDEV_UP:
case NETDEV_CHANGEADDR:
irdma_add_arp(iwdev->rf, local_ipaddr6, false,
- netdev->dev_addr);
- irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true);
+ real_dev->dev_addr);
+ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true);
irdma_gid_change_event(&iwdev->ibdev);
break;
default:
@@ -243,21 +251,23 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
void *ptr)
{
struct neighbour *neigh = ptr;
+ struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev;
struct irdma_device *iwdev;
struct ib_device *ibdev;
__be32 *p;
u32 local_ipaddr[4] = {};
bool ipv4 = true;
- ibdev = ib_device_get_by_netdev((struct net_device *)neigh->dev,
- RDMA_DRIVER_IRDMA);
- if (!ibdev)
- return NOTIFY_DONE;
-
- iwdev = to_iwdev(ibdev);
-
switch (event) {
case NETEVENT_NEIGH_UPDATE:
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
p = (__be32 *)neigh->primary_key;
if (neigh->tbl->family == AF_INET6) {
ipv4 = false;
@@ -278,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
irdma_manage_arp_cache(iwdev->rf, neigh->ha,
local_ipaddr, ipv4,
IRDMA_ARP_DELETE);
+ ib_device_put(ibdev);
break;
default:
break;
}
- ib_device_put(ibdev);
-
return NOTIFY_DONE;
}
@@ -551,12 +560,12 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
* @rf: RDMA PCI function
* @cqp_request: cqp request to wait
*/
-static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf,
- struct irdma_cqp_request *cqp_request)
+static int irdma_wait_event(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
{
struct irdma_cqp_timeout cqp_timeout = {};
bool cqp_error = false;
- enum irdma_status_code err_code = 0;
+ int err_code = 0;
cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
do {
@@ -575,17 +584,20 @@ static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf,
rf->reset = true;
rf->gen_ops.request_reset(rf);
}
- return IRDMA_ERR_TIMEOUT;
+ return -ETIMEDOUT;
} while (1);
cqp_error = cqp_request->compl_info.error;
if (cqp_error) {
- err_code = IRDMA_ERR_CQP_COMPL_ERROR;
- if (cqp_request->compl_info.maj_err_code == 0xFFFF &&
- cqp_request->compl_info.min_err_code == 0x8029) {
- if (!rf->reset) {
- rf->reset = true;
- rf->gen_ops.request_reset(rf);
+ err_code = -EIO;
+ if (cqp_request->compl_info.maj_err_code == 0xFFFF) {
+ if (cqp_request->compl_info.min_err_code == 0x8002)
+ err_code = -EBUSY;
+ else if (cqp_request->compl_info.min_err_code == 0x8029) {
+ if (!rf->reset) {
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
}
}
}
@@ -643,6 +655,7 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
};
static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
+ {0xffff, 0x8002, "Invalid State"},
{0xffff, 0x8006, "Flush No Wqe Pending"},
{0xffff, 0x8007, "Modify QP Bad Close"},
{0xffff, 0x8009, "LLP Closed"},
@@ -680,16 +693,16 @@ bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd,
* @rf: RDMA PCI function
* @cqp_request: cqp request to process
*/
-enum irdma_status_code irdma_handle_cqp_op(struct irdma_pci_f *rf,
- struct irdma_cqp_request *cqp_request)
+int irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
struct cqp_cmds_info *info = &cqp_request->info;
- enum irdma_status_code status;
+ int status;
bool put_cqp_request = true;
if (rf->reset)
- return IRDMA_ERR_NOT_READY;
+ return -EBUSY;
irdma_get_cqp_request(cqp_request);
status = irdma_process_cqp_cmd(dev, info);
@@ -791,17 +804,17 @@ void *irdma_remove_cqp_head(struct irdma_sc_dev *dev)
* @sdinfo: information for sd cqp
*
*/
-enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
- struct irdma_update_sds_info *sdinfo)
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *sdinfo)
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_pci_f *rf = dev_to_rf(dev);
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
@@ -822,19 +835,18 @@ enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
* @qp: hardware control qp
* @op: suspend or resume
*/
-enum irdma_status_code irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp,
- u8 op)
+int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 op)
{
struct irdma_sc_dev *dev = qp->dev;
struct irdma_cqp_request *cqp_request;
struct irdma_sc_cqp *cqp = dev->cqp;
struct cqp_cmds_info *cqp_info;
struct irdma_pci_f *rf = dev_to_rf(dev);
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_info->cqp_cmd = op;
@@ -940,18 +952,17 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
* @val_mem: buffer for fpm
* @hmc_fn_id: function id for fpm
*/
-enum irdma_status_code
-irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
+int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_pci_f *rf = dev_to_rf(dev);
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_request->param = NULL;
@@ -975,18 +986,17 @@ irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
* @val_mem: buffer with fpm values
* @hmc_fn_id: function id for fpm
*/
-enum irdma_status_code
-irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
+int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_pci_f *rf = dev_to_rf(dev);
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_request->param = NULL;
@@ -1009,18 +1019,17 @@ irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
* @dev: device pointer
* @cq: pointer to created cq
*/
-enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_cq *cq)
+int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
{
struct irdma_pci_f *rf = dev_to_rf(dev);
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
@@ -1039,19 +1048,18 @@ enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev,
* @dev: device pointer
* @qp: pointer to created qp
*/
-enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_qp *qp)
+int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
{
struct irdma_pci_f *rf = dev_to_rf(dev);
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_create_qp_info *qp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
qp_info = &cqp_request->info.in.u.qp_create.info;
@@ -1079,7 +1087,7 @@ static void irdma_dealloc_push_page(struct irdma_pci_f *rf,
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX)
return;
@@ -1179,12 +1187,10 @@ static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request)
* @info: info for modify qp
* @wait: flag to wait or not for modify qp completion
*/
-enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev,
- struct irdma_qp *iwqp,
- struct irdma_modify_qp_info *info,
- bool wait)
+int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info, bool wait)
{
- enum irdma_status_code status;
+ int status;
struct irdma_pci_f *rf = iwdev->rf;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
@@ -1192,7 +1198,7 @@ enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev,
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
if (!wait) {
cqp_request->callback_fcn = irdma_hw_modify_qp_callback;
@@ -1230,7 +1236,7 @@ enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev,
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp,
wait);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
m_info = &cqp_info->in.u.qp_modify.info;
@@ -1271,17 +1277,17 @@ void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
* @dev: device pointer
* @qp: pointer to qp
*/
-enum irdma_status_code irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
{
struct irdma_pci_f *rf = dev_to_rf(dev);
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
@@ -1317,20 +1323,20 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
* irdma_init_hash_desc - initialize hash for crc calculation
* @desc: cryption type
*/
-enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc)
+int irdma_init_hash_desc(struct shash_desc **desc)
{
struct crypto_shash *tfm;
struct shash_desc *tdesc;
tfm = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(tfm))
- return IRDMA_ERR_MPA_CRC;
+ return -EINVAL;
tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
GFP_KERNEL);
if (!tdesc) {
crypto_free_shash(tfm);
- return IRDMA_ERR_MPA_CRC;
+ return -EINVAL;
}
tdesc->tfm = tfm;
@@ -1358,19 +1364,19 @@ void irdma_free_hash_desc(struct shash_desc *desc)
* @len: length of buffer
* @val: value to be compared
*/
-enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr, u32 len, u32 val)
+int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
+ u32 val)
{
u32 crc = 0;
int ret;
- enum irdma_status_code ret_code = 0;
+ int ret_code = 0;
crypto_shash_init(desc);
ret = crypto_shash_update(desc, addr, len);
if (!ret)
crypto_shash_final(desc, (u8 *)&crc);
if (crc != val)
- ret_code = IRDMA_ERR_MPA_CRC;
+ ret_code = -EINVAL;
return ret_code;
}
@@ -1524,9 +1530,8 @@ void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
* @info: to get information
* @buf: puda buffer
*/
-static enum irdma_status_code
-irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
- struct irdma_puda_buf *buf)
+static int irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
{
struct iphdr *iph;
struct ipv6hdr *ip6h;
@@ -1563,7 +1568,7 @@ irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
ibdev_dbg(to_ibdev(buf->vsi->dev),
"ERR: payload_len = 0x%x totallen expected0x%x\n",
info->payload_len, buf->totallen);
- return IRDMA_ERR_INVALID_SIZE;
+ return -EINVAL;
}
buf->tcphlen = tcph->doff << 2;
@@ -1580,9 +1585,8 @@ irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
* @info: to get information
* @buf: puda buffer
*/
-enum irdma_status_code
-irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
- struct irdma_puda_buf *buf)
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
{
struct tcphdr *tcph;
u32 pkt_len;
@@ -1861,20 +1865,19 @@ static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request)
* @pestat: pointer to stats info
* @wait: flag to wait or not wait for stats
*/
-enum irdma_status_code
-irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
- struct irdma_vsi_pestat *pestat, bool wait)
+int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat, bool wait)
{
struct irdma_pci_f *rf = dev_to_rf(dev);
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
@@ -1900,22 +1903,21 @@ irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
* @cmd: command to allocate or free
* @stats_info: pointer to allocate stats info
*/
-enum irdma_status_code
-irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
- struct irdma_stats_inst_info *stats_info)
+int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
+ struct irdma_stats_inst_info *stats_info)
{
struct irdma_pci_f *rf = dev_to_rf(vsi->dev);
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
bool wait = false;
if (cmd == IRDMA_OP_STATS_ALLOCATE)
wait = true;
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
@@ -1938,17 +1940,17 @@ irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
* @sc_ceq: pointer to ceq structure
* @op: Create or Destroy
*/
-enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_ceq *sc_ceq, u8 op)
+int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq,
+ u8 op)
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_pci_f *rf = dev_to_rf(dev);
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_info->post_sq = 1;
@@ -1968,17 +1970,17 @@ enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev,
* @sc_aeq: pointer to aeq structure
* @op: Create or Destroy
*/
-enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev,
- struct irdma_sc_aeq *sc_aeq, u8 op)
+int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq,
+ u8 op)
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_pci_f *rf = dev_to_rf(dev);
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
cqp_info->post_sq = 1;
@@ -1998,16 +2000,15 @@ enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev,
* @cmd: Add, modify or delete
* @node_info: pointer to ws node info
*/
-enum irdma_status_code
-irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
- struct irdma_ws_node_info *node_info)
+int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
+ struct irdma_ws_node_info *node_info)
{
struct irdma_pci_f *rf = dev_to_rf(dev);
struct irdma_cqp *iwcqp = &rf->cqp;
struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
bool poll;
if (!rf->sc_dev.ceq_valid)
@@ -2017,7 +2018,7 @@ irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, !poll);
if (!cqp_request)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
@@ -2066,7 +2067,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
{
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
if (cmd != IRDMA_OP_AH_CREATE && cmd != IRDMA_OP_AH_DESTROY)
return -EINVAL;
@@ -2148,11 +2149,10 @@ static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request)
* @ah_ret: Returned pointer to address handle if created
*
*/
-enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev,
- struct irdma_ah_info *ah_info,
- bool wait, enum puda_rsrc_type type,
- void *cb_param,
- struct irdma_sc_ah **ah_ret)
+int irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info, bool wait,
+ enum puda_rsrc_type type, void *cb_param,
+ struct irdma_sc_ah **ah_ret)
{
struct irdma_sc_ah *ah;
struct irdma_pci_f *rf = dev_to_rf(dev);
@@ -2161,7 +2161,7 @@ enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev,
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
*ah_ret = ah;
if (!ah)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah,
&ah_info->ah_idx, &rf->next_ah);
@@ -2187,7 +2187,7 @@ error:
err_free:
kfree(ah);
*ah_ret = NULL;
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
/**
@@ -2229,19 +2229,19 @@ void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request)
* @pprm: pble resource manager
* @pchunk: chunk of memory to add
*/
-enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
- struct irdma_chunk *pchunk)
+int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk)
{
u64 sizeofbitmap;
if (pchunk->size & 0xfff)
- return IRDMA_ERR_PARAM;
+ return -EINVAL;
sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift;
pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL);
if (!pchunk->bitmapbuf)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
pchunk->sizeofbitmap = sizeofbitmap;
/* each pble is 8 bytes hence shift by 3 */
@@ -2259,10 +2259,9 @@ enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
* @vaddr: returns virtual address of pble memory
* @fpm_addr: returns fpm address of pble memory
*/
-enum irdma_status_code
-irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
- struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
- u64 **vaddr, u64 *fpm_addr)
+int irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
+ u64 **vaddr, u64 *fpm_addr)
{
u64 bits_needed;
u64 bit_idx = PBLE_INVALID_IDX;
@@ -2290,7 +2289,7 @@ irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
if (!pchunk || bit_idx >= pchunk->sizeofbitmap) {
spin_unlock_irqrestore(&pprm->prm_lock, flags);
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed);
@@ -2325,8 +2324,8 @@ void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
spin_unlock_irqrestore(&pprm->prm_lock, flags);
}
-enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va,
- dma_addr_t *pg_dma, u32 pg_cnt)
+int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma,
+ u32 pg_cnt)
{
struct page *vm_page;
int i;
@@ -2350,7 +2349,7 @@ enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va,
err:
irdma_unmap_vm_page_list(hw, pg_dma, i);
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt)
@@ -2386,15 +2385,14 @@ done:
* @chunk: chunk to add for paged memory
* @pg_cnt: number of pages needed
*/
-enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk,
- u32 pg_cnt)
+int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt)
{
u32 size;
void *va;
chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
if (!chunk->dmainfo.dmaaddrs)
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
size = PAGE_SIZE * pg_cnt;
va = vmalloc(size);
@@ -2416,7 +2414,7 @@ err:
kfree(chunk->dmainfo.dmaaddrs);
chunk->dmainfo.dmaaddrs = NULL;
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
/**
@@ -2481,6 +2479,9 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event)
case IRDMA_QP_EVENT_ACCESS_ERR:
ibevent.event = IB_EVENT_QP_ACCESS_ERR;
break;
+ case IRDMA_QP_EVENT_REQ_ERR:
+ ibevent.event = IB_EVENT_QP_REQ_ERR;
+ break;
}
ibevent.device = iwqp->ibqp.device;
ibevent.element.qp = &iwqp->ibqp;
@@ -2501,3 +2502,150 @@ bool irdma_cq_empty(struct irdma_cq *iwcq)
return polarity != ukcq->polarity;
}
+
+void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
+{
+ struct irdma_cmpl_gen *cmpl_node;
+ struct list_head *tmp_node, *list_node;
+
+ list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
+ cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
+ list_del(&cmpl_node->list);
+ kfree(cmpl_node);
+ }
+}
+
+int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
+{
+ struct irdma_cmpl_gen *cmpl;
+
+ if (list_empty(&iwcq->cmpl_generated))
+ return -ENOENT;
+ cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
+ list_del(&cmpl->list);
+ memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
+ kfree(cmpl);
+
+ ibdev_dbg(iwcq->ibcq.device,
+ "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
+ __func__, cq_poll_info->qp_id, cq_poll_info->op_type,
+ cq_poll_info->wr_id);
+
+ return 0;
+}
+
+/**
+ * irdma_set_cpi_common_values - fill in values for polling info struct
+ * @cpi: resulting structure of cq_poll_info type
+ * @qp: QPair
+ * @qp_num: id of the QP
+ */
+static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
+ struct irdma_qp_uk *qp, u32 qp_num)
+{
+ cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ cpi->error = true;
+ cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
+ cpi->minor_err = FLUSH_GENERAL_ERR;
+ cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
+ cpi->qp_id = qp_num;
+}
+
+static inline void irdma_comp_handler(struct irdma_cq *cq)
+{
+ if (!cq->ibcq.comp_handler)
+ return;
+ if (atomic_cmpxchg(&cq->armed, 1, 0))
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+void irdma_generate_flush_completions(struct irdma_qp *iwqp)
+{
+ struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
+ struct irdma_ring *sq_ring = &qp->sq_ring;
+ struct irdma_ring *rq_ring = &qp->rq_ring;
+ struct irdma_cmpl_gen *cmpl;
+ __le64 *sw_wqe;
+ u64 wqe_qword;
+ u32 wqe_idx;
+ bool compl_generated = false;
+ unsigned long flags1;
+
+ spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
+ if (irdma_cq_empty(iwqp->iwscq)) {
+ unsigned long flags2;
+
+ spin_lock_irqsave(&iwqp->lock, flags2);
+ while (IRDMA_RING_MORE_WORK(*sq_ring)) {
+ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
+ if (!cmpl) {
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
+ return;
+ }
+
+ wqe_idx = sq_ring->tail;
+ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
+
+ cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ sw_wqe = qp->sq_base[wqe_idx].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
+ /* remove the SQ WR by moving SQ tail*/
+ IRDMA_RING_SET_TAIL(*sq_ring,
+ sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
+
+ ibdev_dbg(iwqp->iwscq->ibcq.device,
+ "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
+ __func__, cmpl->cpi.wr_id, qp->qp_id);
+ list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
+ compl_generated = true;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
+ if (compl_generated)
+ irdma_comp_handler(iwqp->iwscq);
+ } else {
+ spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
+ if (irdma_cq_empty(iwqp->iwrcq)) {
+ unsigned long flags2;
+
+ spin_lock_irqsave(&iwqp->lock, flags2);
+ while (IRDMA_RING_MORE_WORK(*rq_ring)) {
+ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
+ if (!cmpl) {
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
+ return;
+ }
+
+ wqe_idx = rq_ring->tail;
+ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
+
+ cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
+ cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
+ /* remove the RQ WR by moving RQ tail */
+ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
+ ibdev_dbg(iwqp->iwrcq->ibcq.device,
+ "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
+ __func__, cmpl->cpi.wr_id, qp->qp_id,
+ wqe_idx);
+ list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
+
+ compl_generated = true;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
+ if (compl_generated)
+ irdma_comp_handler(iwqp->iwrcq);
+ } else {
+ spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 8cd5f9261692..a22afbb25bc5 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -21,30 +21,36 @@ static int irdma_query_device(struct ib_device *ibdev,
return -EINVAL;
memset(props, 0, sizeof(*props));
- ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
+ addrconf_addr_eui48((u8 *)&props->sys_image_guid,
+ iwdev->netdev->dev_addr);
props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
irdma_fw_minor_ver(&rf->sc_dev);
- props->device_cap_flags = iwdev->device_cap_flags;
+ props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
props->vendor_id = pcidev->vendor;
props->vendor_part_id = pcidev->device;
props->hw_ver = rf->pcidev->revision;
- props->page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ props->page_size_cap = hw_attrs->page_size_cap;
props->max_mr_size = hw_attrs->max_mr_size;
props->max_qp = rf->max_qp - rf->used_qps;
props->max_qp_wr = hw_attrs->max_qp_wr;
props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
props->max_cq = rf->max_cq - rf->used_cqs;
- props->max_cqe = rf->max_cqe;
+ props->max_cqe = rf->max_cqe - 1;
props->max_mr = rf->max_mr - rf->used_mrs;
props->max_mw = props->max_mr;
props->max_pd = rf->max_pd - rf->used_pds;
props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
props->max_qp_rd_atom = hw_attrs->max_hw_ird;
props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
- if (rdma_protocol_roce(ibdev, 1))
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
props->max_pkeys = IRDMA_PKEY_TBL_SZ;
+ }
+
props->max_ah = rf->max_ah;
props->max_mcast_grp = rf->max_mcg;
props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
@@ -255,7 +261,7 @@ static void irdma_alloc_push_page(struct irdma_qp *iwqp)
struct cqp_cmds_info *cqp_info;
struct irdma_device *iwdev = iwqp->iwdev;
struct irdma_sc_qp *qp = &iwqp->sc_qp;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
if (!cqp_request)
@@ -293,13 +299,19 @@ static void irdma_alloc_push_page(struct irdma_qp *iwqp)
static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
+#define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8)
+#define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd)
struct ib_device *ibdev = uctx->device;
struct irdma_device *iwdev = to_iwdev(ibdev);
- struct irdma_alloc_ucontext_req req;
+ struct irdma_alloc_ucontext_req req = {};
struct irdma_alloc_ucontext_resp uresp = {};
struct irdma_ucontext *ucontext = to_ucontext(uctx);
struct irdma_uk_attrs *uk_attrs;
+ if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN)
+ return -EINVAL;
+
if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)))
return -EINVAL;
@@ -311,7 +323,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
/* GEN_1 legacy support with libi40iw */
- if (udata->outlen < sizeof(uresp)) {
+ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) {
if (uk_attrs->hw_rev != IRDMA_GEN_1)
return -EOPNOTSUPP;
@@ -383,6 +395,7 @@ static void irdma_dealloc_ucontext(struct ib_ucontext *context)
*/
static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
+#define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd)
struct irdma_pd *iwpd = to_iwpd(pd);
struct irdma_device *iwdev = to_iwdev(pd->device);
struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
@@ -392,6 +405,9 @@ static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
u32 pd_id = 0;
int err;
+ if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN)
+ return -EINVAL;
+
err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id,
&rf->next_pd);
if (err)
@@ -532,6 +548,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
irdma_modify_qp_to_err(&iwqp->sc_qp);
+ if (!iwqp->user_mode)
+ cancel_delayed_work_sync(&iwqp->dwork_flush);
+
irdma_qp_rem_ref(&iwqp->ibqp);
wait_for_completion(&iwqp->free_qp);
irdma_free_lsmm_rsrc(iwqp);
@@ -591,7 +610,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
u32 sqdepth, rqdepth;
u8 sqshift, rqshift;
u32 size;
- enum irdma_status_code status;
+ int status;
struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
@@ -602,7 +621,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift,
&sqdepth);
if (status)
- return -ENOMEM;
+ return status;
if (uk_attrs->hw_rev == IRDMA_GEN_1)
rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
@@ -613,7 +632,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift,
&rqdepth);
if (status)
- return -ENOMEM;
+ return status;
iwqp->kqp.sq_wrid_mem =
kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL);
@@ -667,7 +686,7 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp)
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_create_qp_info *qp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
if (!cqp_request)
@@ -687,7 +706,7 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp)
status = irdma_handle_cqp_op(rf, cqp_request);
irdma_put_cqp_request(&rf->cqp, cqp_request);
- return status ? -ENOMEM : 0;
+ return status;
}
static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
@@ -787,6 +806,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
return 0;
}
+static void irdma_flush_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
+
+ irdma_generate_flush_completions(iwqp);
+}
+
/**
* irdma_create_qp - create qp
* @ibqp: ptr of qp
@@ -797,15 +824,16 @@ static int irdma_create_qp(struct ib_qp *ibqp,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
+#define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx)
+#define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd)
struct ib_pd *ibpd = ibqp->pd;
struct irdma_pd *iwpd = to_iwpd(ibpd);
struct irdma_device *iwdev = to_iwdev(ibpd->device);
struct irdma_pci_f *rf = iwdev->rf;
struct irdma_qp *iwqp = to_iwqp(ibqp);
- struct irdma_create_qp_req req;
+ struct irdma_create_qp_req req = {};
struct irdma_create_qp_resp uresp = {};
u32 qp_num = 0;
- enum irdma_status_code ret;
int err_code;
int sq_size;
int rq_size;
@@ -820,6 +848,10 @@ static int irdma_create_qp(struct ib_qp *ibqp,
if (err_code)
return err_code;
+ if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN))
+ return -EINVAL;
+
sq_size = init_attr->cap.max_send_wr;
rq_size = init_attr->cap.max_recv_wr;
@@ -907,6 +939,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
irdma_setup_virt_qp(iwdev, iwqp, &init_info);
} else {
+ INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
}
@@ -935,9 +968,8 @@ static int irdma_create_qp(struct ib_qp *ibqp,
if (dev->hw_attrs.uk_attrs.hw_rev > IRDMA_GEN_1)
init_info.qp_uk_init_info.qp_caps |= IRDMA_PUSH_MODE;
- ret = irdma_sc_qp_init(qp, &init_info);
- if (ret) {
- err_code = -EPROTO;
+ err_code = irdma_sc_qp_init(qp, &init_info);
+ if (err_code) {
ibdev_dbg(&iwdev->ibdev, "VERBS: qp_init fail\n");
goto error;
}
@@ -1104,6 +1136,8 @@ static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
+#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
+#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
struct irdma_pd *iwpd = to_iwpd(ibqp->pd);
struct irdma_qp *iwqp = to_iwqp(ibqp);
struct irdma_device *iwdev = iwqp->iwdev;
@@ -1122,6 +1156,13 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
roce_info = &iwqp->roce_info;
udp_info = &iwqp->udp_info;
+ if (udata) {
+ /* udata inlen/outlen can be 0 when supporting legacy libi40iw */
+ if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
+ (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
+ return -EINVAL;
+ }
+
if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
@@ -1170,6 +1211,10 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
udp_info->ttl = attr->ah_attr.grh.hop_limit;
udp_info->flow_label = attr->ah_attr.grh.flow_label;
udp_info->tos = attr->ah_attr.grh.traffic_class;
+ udp_info->src_port =
+ rdma_get_udp_sport(udp_info->flow_label,
+ ibqp->qp_num,
+ roce_info->dest_qp);
irdma_qp_rem_qos(&iwqp->sc_qp);
dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri);
ctx_info->user_pri = rt_tos2priority(udp_info->tos);
@@ -1184,7 +1229,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (ret)
return ret;
- if (vlan_id >= VLAN_N_VID && iwdev->dcb)
+ if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
vlan_id = 0;
if (vlan_id < VLAN_N_VID) {
udp_info->insert_vlan_tag = true;
@@ -1197,7 +1242,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
av->attrs = attr->ah_attr;
rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
- if (av->sgid_addr.saddr.sa_family == AF_INET6) {
+ if (av->net_type == RDMA_NETWORK_IPV6) {
__be32 *daddr =
av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
__be32 *saddr =
@@ -1213,7 +1258,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
&local_ip[0],
false, NULL,
IRDMA_ARP_RESOLVE);
- } else {
+ } else if (av->net_type == RDMA_NETWORK_IPV4) {
__be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr;
__be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr;
@@ -1354,7 +1399,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
spin_unlock_irqrestore(&iwqp->lock, flags);
- if (udata) {
+ if (udata && udata->inlen) {
if (ib_copy_from_udata(&ureq, udata,
min(sizeof(ureq), udata->inlen)))
return -EINVAL;
@@ -1395,18 +1440,18 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (iwqp->ibqp_state > IB_QPS_RTS &&
!iwqp->flush_issued) {
- iwqp->flush_issued = 1;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
IRDMA_FLUSH_RQ |
IRDMA_FLUSH_WAIT);
+ iwqp->flush_issued = 1;
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
}
} else {
iwqp->ibqp_state = attr->qp_state;
}
- if (udata && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
struct irdma_ucontext *ucontext;
ucontext = rdma_udata_to_drv_context(udata,
@@ -1446,6 +1491,8 @@ exit:
int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
{
+#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
+#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
struct irdma_qp *iwqp = to_iwqp(ibqp);
struct irdma_device *iwdev = iwqp->iwdev;
struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
@@ -1460,6 +1507,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
int err;
unsigned long flags;
+ if (udata) {
+ /* udata inlen/outlen can be 0 when supporting legacy libi40iw */
+ if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
+ (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
+ return -EINVAL;
+ }
+
if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
@@ -1545,7 +1599,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
case IB_QPS_RESET:
if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
spin_unlock_irqrestore(&iwqp->lock, flags);
- if (udata) {
+ if (udata && udata->inlen) {
if (ib_copy_from_udata(&ureq, udata,
min(sizeof(ureq), udata->inlen)))
return -EINVAL;
@@ -1615,13 +1669,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
if (dont_wait) {
- if (iwqp->cm_id && iwqp->hw_tcp_state) {
+ if (iwqp->hw_tcp_state) {
spin_lock_irqsave(&iwqp->lock, flags);
iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
iwqp->last_aeq = IRDMA_AE_RESET_SENT;
spin_unlock_irqrestore(&iwqp->lock, flags);
- irdma_cm_disconn(iwqp);
}
+ irdma_cm_disconn(iwqp);
} else {
int close_timer_started;
@@ -1642,7 +1696,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
}
}
}
- if (attr_mask & IB_QP_STATE && udata &&
+ if (attr_mask & IB_QP_STATE && udata && udata->outlen &&
dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
struct irdma_ucontext *ucontext;
@@ -1752,16 +1806,18 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
unsigned long flags;
spin_lock_irqsave(&iwcq->lock, flags);
+ if (!list_empty(&iwcq->cmpl_generated))
+ irdma_remove_cmpls_list(iwcq);
if (!list_empty(&iwcq->resize_list))
irdma_process_resize_list(iwcq, iwdev, NULL);
spin_unlock_irqrestore(&iwcq->lock, flags);
irdma_cq_wq_destroy(iwdev->rf, cq);
- irdma_cq_free_rsrc(iwdev->rf, iwcq);
spin_lock_irqsave(&iwceq->ce_lock, flags);
irdma_sc_cleanup_ceqes(cq, ceq);
spin_unlock_irqrestore(&iwceq->ce_lock, flags);
+ irdma_cq_free_rsrc(iwdev->rf, iwcq);
return 0;
}
@@ -1775,6 +1831,7 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
struct ib_udata *udata)
{
+#define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer)
struct irdma_cq *iwcq = to_iwcq(ibcq);
struct irdma_sc_dev *dev = iwcq->sc_cq.dev;
struct irdma_cqp_request *cqp_request;
@@ -1787,7 +1844,6 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
struct irdma_device *iwdev;
struct irdma_pci_f *rf;
struct irdma_cq_buf *cq_buf = NULL;
- enum irdma_status_code status = 0;
unsigned long flags;
int ret;
@@ -1798,6 +1854,9 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
IRDMA_FEATURE_CQ_RESIZE))
return -EOPNOTSUPP;
+ if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN)
+ return -EINVAL;
+
if (entries > rf->max_cqe)
return -EINVAL;
@@ -1880,12 +1939,10 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq;
cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request;
cqp_info->post_sq = 1;
- status = irdma_handle_cqp_op(rf, cqp_request);
+ ret = irdma_handle_cqp_op(rf, cqp_request);
irdma_put_cqp_request(&rf->cqp, cqp_request);
- if (status) {
- ret = -EPROTO;
+ if (ret)
goto error;
- }
spin_lock_irqsave(&iwcq->lock, flags);
if (cq_buf) {
@@ -1932,6 +1989,8 @@ static int irdma_create_cq(struct ib_cq *ibcq,
const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
+#define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf)
+#define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size)
struct ib_device *ibdev = ibcq->device;
struct irdma_device *iwdev = to_iwdev(ibdev);
struct irdma_pci_f *rf = iwdev->rf;
@@ -1940,7 +1999,6 @@ static int irdma_create_cq(struct ib_cq *ibcq,
struct irdma_sc_cq *cq;
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_cq_init_info info = {};
- enum irdma_status_code status;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
@@ -1951,6 +2009,11 @@ static int irdma_create_cq(struct ib_cq *ibcq,
err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
if (err_code)
return err_code;
+
+ if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN))
+ return -EINVAL;
+
err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num,
&rf->next_cq);
if (err_code)
@@ -1960,6 +2023,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
cq->back_cq = iwcq;
spin_lock_init(&iwcq->lock);
INIT_LIST_HEAD(&iwcq->resize_list);
+ INIT_LIST_HEAD(&iwcq->cmpl_generated);
info.dev = dev;
ukinfo->cq_size = max(entries, 4);
ukinfo->cq_id = cq_num;
@@ -2090,12 +2154,10 @@ static int irdma_create_cq(struct ib_cq *ibcq,
cqp_info->in.u.cq_create.cq = cq;
cqp_info->in.u.cq_create.check_overflow = true;
cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
- status = irdma_handle_cqp_op(rf, cqp_request);
+ err_code = irdma_handle_cqp_op(rf, cqp_request);
irdma_put_cqp_request(&rf->cqp, cqp_request);
- if (status) {
- err_code = -ENOMEM;
+ if (err_code)
goto cq_free_rsrc;
- }
if (udata) {
struct irdma_create_cq_resp resp = {};
@@ -2304,14 +2366,14 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
struct irdma_pble_info *pinfo;
u64 *pbl;
- enum irdma_status_code status;
+ int status;
enum irdma_pble_level level = PBLE_LEVEL_1;
if (use_pbles) {
status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
false);
if (status)
- return -ENOMEM;
+ return status;
iwpbl->pbl_allocated = true;
level = palloc->level;
@@ -2429,7 +2491,7 @@ static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr)
struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
if (!cqp_request)
@@ -2452,7 +2514,7 @@ static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr)
status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
- return status ? -ENOMEM : 0;
+ return status;
}
/**
@@ -2504,7 +2566,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw)
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.dealloc_stag.info;
memset(info, 0, sizeof(*info));
- info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff;
+ info->pd_id = iwpd->sc_pd.pd_id;
info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
info->mr = false;
cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
@@ -2528,8 +2590,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
{
struct irdma_allocate_stag_info *info;
struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
- enum irdma_status_code status;
- int err = 0;
+ int status;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
@@ -2551,10 +2612,8 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
- if (status)
- err = -ENOMEM;
- return err;
+ return status;
}
/**
@@ -2570,9 +2629,8 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
struct irdma_pble_alloc *palloc;
struct irdma_pbl *iwpbl;
struct irdma_mr *iwmr;
- enum irdma_status_code status;
u32 stag;
- int err_code = -ENOMEM;
+ int err_code;
iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
if (!iwmr)
@@ -2594,9 +2652,9 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
iwmr->type = IRDMA_MEMREG_TYPE_MEM;
palloc = &iwpbl->pble_alloc;
iwmr->page_cnt = max_num_sg;
- status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
- true);
- if (status)
+ err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
+ false);
+ if (err_code)
goto err_get_pble;
err_code = irdma_hw_alloc_stag(iwdev, iwmr);
@@ -2631,8 +2689,16 @@ static int irdma_set_page(struct ib_mr *ibmr, u64 addr)
if (unlikely(iwmr->npages == iwmr->page_cnt))
return -ENOMEM;
- pbl = palloc->level1.addr;
- pbl[iwmr->npages++] = addr;
+ if (palloc->level == PBLE_LEVEL_2) {
+ struct irdma_pble_info *palloc_info =
+ palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT);
+
+ palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr;
+ } else {
+ pbl = palloc->level1.addr;
+ pbl[iwmr->npages] = addr;
+ }
+ iwmr->npages++;
return 0;
}
@@ -2667,10 +2733,9 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
struct irdma_reg_ns_stag_info *stag_info;
struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
- enum irdma_status_code status;
- int err = 0;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
+ int ret;
cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
if (!cqp_request)
@@ -2707,12 +2772,10 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
cqp_info->post_sq = 1;
cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev;
cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
- status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ ret = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
- if (status)
- err = -ENOMEM;
- return err;
+ return ret;
}
/**
@@ -2728,6 +2791,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
u64 virt, int access,
struct ib_udata *udata)
{
+#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages)
struct irdma_device *iwdev = to_iwdev(pd->device);
struct irdma_ucontext *ucontext;
struct irdma_pble_alloc *palloc;
@@ -2745,6 +2809,9 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
return ERR_PTR(-EINVAL);
+ if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
+ return ERR_PTR(-EINVAL);
+
region = ib_umem_get(pd->device, start, len, access);
if (IS_ERR(region)) {
@@ -2774,7 +2841,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
iwmr->page_size = ib_umem_find_best_pgsz(region,
- SZ_4K | SZ_2M | SZ_1G,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
virt);
if (unlikely(!iwmr->page_size)) {
kfree(iwmr);
@@ -2892,7 +2959,6 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access
struct irdma_device *iwdev = to_iwdev(pd->device);
struct irdma_pbl *iwpbl;
struct irdma_mr *iwmr;
- enum irdma_status_code status;
u32 stag;
int ret;
@@ -2920,10 +2986,9 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access
iwmr->pgaddrmem[0] = addr;
iwmr->len = size;
iwmr->page_size = SZ_4K;
- status = irdma_hwreg_mr(iwdev, iwmr, access);
- if (status) {
+ ret = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (ret) {
irdma_free_stag(iwdev, stag);
- ret = -ENOMEM;
goto err;
}
@@ -2996,6 +3061,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
+ int status;
if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
if (iwmr->region) {
@@ -3016,7 +3082,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.dealloc_stag.info;
memset(info, 0, sizeof(*info));
- info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff;
+ info->pd_id = iwpd->sc_pd.pd_id;
info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
info->mr = true;
if (iwpbl->pbl_allocated)
@@ -3026,8 +3092,11 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
cqp_info->post_sq = 1;
cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
- irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
+
irdma_free_stag(iwdev, iwmr->stag);
done:
if (iwpbl->pbl_allocated)
@@ -3052,20 +3121,16 @@ static int irdma_post_send(struct ib_qp *ibqp,
struct irdma_qp_uk *ukqp;
struct irdma_sc_dev *dev;
struct irdma_post_sq_info info;
- enum irdma_status_code ret;
int err = 0;
unsigned long flags;
bool inv_stag;
struct irdma_ah *ah;
- bool reflush = false;
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
dev = &iwqp->iwdev->rf->sc_dev;
spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->flush_issued && ukqp->sq_flush_complete)
- reflush = true;
while (ib_wr) {
memset(&info, 0, sizeof(info));
inv_stag = false;
@@ -3111,7 +3176,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey;
info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn;
}
- ret = irdma_uk_inline_send(ukqp, &info, false);
+ err = irdma_uk_inline_send(ukqp, &info, false);
} else {
info.op.send.num_sges = ib_wr->num_sge;
info.op.send.sg_list = ib_wr->sg_list;
@@ -3122,14 +3187,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
}
- ret = irdma_uk_send(ukqp, &info, false);
- }
-
- if (ret) {
- if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
+ err = irdma_uk_send(ukqp, &info, false);
}
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -3155,20 +3213,13 @@ static int irdma_post_send(struct ib_qp *ibqp,
rdma_wr(ib_wr)->remote_addr;
info.op.inline_rdma_write.rem_addr.lkey =
rdma_wr(ib_wr)->rkey;
- ret = irdma_uk_inline_rdma_write(ukqp, &info, false);
+ err = irdma_uk_inline_rdma_write(ukqp, &info, false);
} else {
info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
- ret = irdma_uk_rdma_write(ukqp, &info, false);
- }
-
- if (ret) {
- if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
+ err = irdma_uk_rdma_write(ukqp, &info, false);
}
break;
case IB_WR_RDMA_READ_WITH_INV:
@@ -3185,21 +3236,12 @@ static int irdma_post_send(struct ib_qp *ibqp,
info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
-
- ret = irdma_uk_rdma_read(ukqp, &info, inv_stag, false);
- if (ret) {
- if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
+ err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false);
break;
case IB_WR_LOCAL_INV:
info.op_type = IRDMA_OP_TYPE_INV_STAG;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
- ret = irdma_uk_stag_local_invalidate(ukqp, &info, true);
- if (ret)
- err = -ENOMEM;
+ err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
break;
case IB_WR_REG_MR: {
struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
@@ -3221,10 +3263,8 @@ static int irdma_post_send(struct ib_qp *ibqp,
stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
if (iwmr->npages > IRDMA_MIN_PAGES_PER_FMR)
stag_info.chunk_size = 1;
- ret = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info,
+ err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info,
true);
- if (ret)
- err = -ENOMEM;
break;
}
default:
@@ -3240,15 +3280,14 @@ static int irdma_post_send(struct ib_qp *ibqp,
ib_wr = ib_wr->next;
}
- if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) {
- irdma_uk_qp_post_wr(ukqp);
- spin_unlock_irqrestore(&iwqp->lock, flags);
- } else if (reflush) {
- ukqp->sq_flush_complete = false;
+ if (!iwqp->flush_issued) {
+ if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
+ irdma_uk_qp_post_wr(ukqp);
spin_unlock_irqrestore(&iwqp->lock, flags);
- irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH);
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
if (err)
*bad_wr = ib_wr;
@@ -3269,29 +3308,21 @@ static int irdma_post_recv(struct ib_qp *ibqp,
struct irdma_qp *iwqp;
struct irdma_qp_uk *ukqp;
struct irdma_post_rq_info post_recv = {};
- enum irdma_status_code ret = 0;
unsigned long flags;
int err = 0;
- bool reflush = false;
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->flush_issued && ukqp->rq_flush_complete)
- reflush = true;
while (ib_wr) {
post_recv.num_sges = ib_wr->num_sge;
post_recv.wr_id = ib_wr->wr_id;
post_recv.sg_list = ib_wr->sg_list;
- ret = irdma_uk_post_receive(ukqp, &post_recv);
- if (ret) {
+ err = irdma_uk_post_receive(ukqp, &post_recv);
+ if (err) {
ibdev_dbg(&iwqp->iwdev->ibdev,
- "VERBS: post_recv err %d\n", ret);
- if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
+ "VERBS: post_recv err %d\n", err);
goto out;
}
@@ -3299,13 +3330,10 @@ static int irdma_post_recv(struct ib_qp *ibqp,
}
out:
- if (reflush) {
- ukqp->rq_flush_complete = false;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH);
- } else {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (iwqp->flush_issued)
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
if (err)
*bad_wr = ib_wr;
@@ -3336,6 +3364,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
return IB_WC_RETRY_EXC_ERR;
case FLUSH_MW_BIND_ERR:
return IB_WC_MW_BIND_ERR;
+ case FLUSH_REM_INV_REQ_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
case FLUSH_FATAL_ERR:
default:
return IB_WC_FATAL_ERR;
@@ -3478,7 +3508,7 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
struct irdma_cq_buf *last_buf = NULL;
struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe;
struct irdma_cq_buf *cq_buf;
- enum irdma_status_code ret;
+ int ret;
struct irdma_device *iwdev;
struct irdma_cq_uk *ukcq;
bool cq_new_cqe = false;
@@ -3498,10 +3528,10 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
cq_new_cqe = true;
continue;
}
- if (ret == IRDMA_ERR_Q_EMPTY)
+ if (ret == -ENOENT)
break;
/* QP using the CQ is destroyed. Skip reporting this CQE */
- if (ret == IRDMA_ERR_Q_DESTROYED) {
+ if (ret == -EFAULT) {
cq_new_cqe = true;
continue;
}
@@ -3517,16 +3547,21 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
/* check the current CQ for new cqes */
while (npolled < num_entries) {
ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
+ if (ret == -ENOENT) {
+ ret = irdma_generated_cmpls(iwcq, cur_cqe);
+ if (!ret)
+ irdma_process_cqe(entry + npolled, cur_cqe);
+ }
if (!ret) {
++npolled;
cq_new_cqe = true;
continue;
}
- if (ret == IRDMA_ERR_Q_EMPTY)
+ if (ret == -ENOENT)
break;
/* QP using the CQ is destroyed. Skip reporting this CQE */
- if (ret == IRDMA_ERR_Q_DESTROYED) {
+ if (ret == -EFAULT) {
cq_new_cqe = true;
continue;
}
@@ -3548,7 +3583,7 @@ error:
ibdev_dbg(&iwdev->ibdev, "%s: Error polling CQ, irdma_err: %d\n",
__func__, ret);
- return -EINVAL;
+ return ret;
}
/**
@@ -3598,13 +3633,13 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
promo_event = true;
- if (!iwcq->armed || promo_event) {
- iwcq->armed = true;
+ if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
iwcq->last_notify = cq_notify;
irdma_uk_cq_request_notification(ukcq, cq_notify);
}
- if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq))
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
ret = 1;
spin_unlock_irqrestore(&iwcq->lock, flags);
@@ -3854,7 +3889,7 @@ static int irdma_mcast_cqp_op(struct irdma_device *iwdev,
{
struct cqp_cmds_info *cqp_info;
struct irdma_cqp_request *cqp_request;
- enum irdma_status_code status;
+ int status;
cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
if (!cqp_request)
@@ -3868,10 +3903,8 @@ static int irdma_mcast_cqp_op(struct irdma_device *iwdev,
cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp;
status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
- if (status)
- return -ENOMEM;
- return 0;
+ return status;
}
/**
@@ -3927,11 +3960,7 @@ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
int ret = 0;
bool ipv4;
u16 vlan_id;
- union {
- struct sockaddr saddr;
- struct sockaddr_in saddr_in;
- struct sockaddr_in6 saddr_in6;
- } sgid_addr;
+ union irdma_sockaddr sgid_addr;
unsigned char dmac[ETH_ALEN];
rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
@@ -4067,11 +4096,7 @@ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
int ret;
unsigned long flags;
- union {
- struct sockaddr saddr;
- struct sockaddr_in saddr_in;
- struct sockaddr_in6 saddr_in6;
- } sgid_addr;
+ union irdma_sockaddr sgid_addr;
rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid))
@@ -4127,17 +4152,47 @@ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
return 0;
}
-/**
- * irdma_create_ah - create address handle
- * @ibah: address handle
- * @attr: address handle attributes
- * @udata: User data
- *
- * returns 0 on success, error otherwise
- */
-static int irdma_create_ah(struct ib_ah *ibah,
- struct rdma_ah_init_attr *attr,
- struct ib_udata *udata)
+static int irdma_create_hw_ah(struct irdma_device *iwdev, struct irdma_ah *ah, bool sleep)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+ int err;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah->sc_ah.ah_info.ah_idx,
+ &rf->next_ah);
+ if (err)
+ return err;
+
+ err = irdma_ah_cqp_op(rf, &ah->sc_ah, IRDMA_OP_AH_CREATE, sleep,
+ irdma_gsi_ud_qp_ah_cb, &ah->sc_ah);
+
+ if (err) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: CQP-OP Create AH fail");
+ goto err_ah_create;
+ }
+
+ if (!sleep) {
+ int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD;
+
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ mdelay(1);
+ } while (!ah->sc_ah.ah_info.ah_valid && --cnt);
+
+ if (!cnt) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: CQP create AH timed out");
+ err = -ETIMEDOUT;
+ goto err_ah_create;
+ }
+ }
+ return 0;
+
+err_ah_create:
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx);
+
+ return err;
+}
+
+static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr)
{
struct irdma_pd *pd = to_iwpd(ibah->pd);
struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
@@ -4146,25 +4201,13 @@ static int irdma_create_ah(struct ib_ah *ibah,
struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
struct irdma_pci_f *rf = iwdev->rf;
struct irdma_sc_ah *sc_ah;
- u32 ah_id = 0;
struct irdma_ah_info *ah_info;
- struct irdma_create_ah_resp uresp;
- union {
- struct sockaddr saddr;
- struct sockaddr_in saddr_in;
- struct sockaddr_in6 saddr_in6;
- } sgid_addr, dgid_addr;
+ union irdma_sockaddr sgid_addr, dgid_addr;
int err;
u8 dmac[ETH_ALEN];
- err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id,
- &rf->next_ah);
- if (err)
- return err;
-
ah->pd = pd;
sc_ah = &ah->sc_ah;
- sc_ah->ah_info.ah_idx = ah_id;
sc_ah->ah_info.vsi = &iwdev->vsi;
irdma_sc_init_ah(&rf->sc_dev, sc_ah);
ah->sgid_index = ah_attr->grh.sgid_index;
@@ -4174,10 +4217,7 @@ static int irdma_create_ah(struct ib_ah *ibah,
rdma_gid2ip((struct sockaddr *)&dgid_addr, &ah_attr->grh.dgid);
ah->av.attrs = *ah_attr;
ah->av.net_type = rdma_gid_attr_network_type(sgid_attr);
- ah->av.sgid_addr.saddr = sgid_addr.saddr;
- ah->av.dgid_addr.saddr = dgid_addr.saddr;
ah_info = &sc_ah->ah_info;
- ah_info->ah_idx = ah_id;
ah_info->pd_idx = pd->sc_pd.pd_id;
if (ah_attr->ah_flags & IB_AH_GRH) {
ah_info->flow_label = ah_attr->grh.flow_label;
@@ -4186,7 +4226,7 @@ static int irdma_create_ah(struct ib_ah *ibah,
}
ether_addr_copy(dmac, ah_attr->roce.dmac);
- if (rdma_gid_attr_network_type(sgid_attr) == RDMA_NETWORK_IPV4) {
+ if (ah->av.net_type == RDMA_NETWORK_IPV4) {
ah_info->ipv4_valid = true;
ah_info->dest_ip_addr[0] =
ntohl(dgid_addr.saddr_in.sin_addr.s_addr);
@@ -4214,17 +4254,15 @@ static int irdma_create_ah(struct ib_ah *ibah,
err = rdma_read_gid_l2_fields(sgid_attr, &ah_info->vlan_tag,
ah_info->mac_addr);
if (err)
- goto error;
+ return err;
ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr,
ah_info->ipv4_valid, dmac);
- if (ah_info->dst_arpindex == -1) {
- err = -EINVAL;
- goto error;
- }
+ if (ah_info->dst_arpindex == -1)
+ return -EINVAL;
- if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb)
+ if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode)
ah_info->vlan_tag = 0;
if (ah_info->vlan_tag < VLAN_N_VID) {
@@ -4233,43 +4271,38 @@ static int irdma_create_ah(struct ib_ah *ibah,
rt_tos2priority(ah_info->tc_tos) << VLAN_PRIO_SHIFT;
}
- err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE,
- attr->flags & RDMA_CREATE_AH_SLEEPABLE,
- irdma_gsi_ud_qp_ah_cb, sc_ah);
-
- if (err) {
- ibdev_dbg(&iwdev->ibdev,
- "VERBS: CQP-OP Create AH fail");
- goto error;
- }
-
- if (!(attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
- int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD;
-
- do {
- irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
- mdelay(1);
- } while (!sc_ah->ah_info.ah_valid && --cnt);
+ return 0;
+}
- if (!cnt) {
- ibdev_dbg(&iwdev->ibdev,
- "VERBS: CQP create AH timed out");
- err = -ETIMEDOUT;
- goto error;
+/**
+ * irdma_ah_exists - Check for existing identical AH
+ * @iwdev: irdma device
+ * @new_ah: AH to check for
+ *
+ * returns true if AH is found, false if not found.
+ */
+static bool irdma_ah_exists(struct irdma_device *iwdev,
+ struct irdma_ah *new_ah)
+{
+ struct irdma_ah *ah;
+ u32 key = new_ah->sc_ah.ah_info.dest_ip_addr[0] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[1] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[2] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[3];
+
+ hash_for_each_possible(iwdev->ah_hash_tbl, ah, list, key) {
+ /* Set ah_valid and ah_id the same so memcmp can work */
+ new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx;
+ new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid;
+ if (!memcmp(&ah->sc_ah.ah_info, &new_ah->sc_ah.ah_info,
+ sizeof(ah->sc_ah.ah_info))) {
+ refcount_inc(&ah->refcnt);
+ new_ah->parent_ah = ah;
+ return true;
}
}
- if (udata) {
- uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
- err = ib_copy_to_udata(udata, &uresp,
- min(sizeof(uresp), udata->outlen));
- }
- return 0;
-
-error:
- irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id);
-
- return err;
+ return false;
}
/**
@@ -4282,6 +4315,17 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags)
struct irdma_device *iwdev = to_iwdev(ibah->device);
struct irdma_ah *ah = to_iwah(ibah);
+ if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) {
+ mutex_lock(&iwdev->ah_tbl_lock);
+ if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) {
+ mutex_unlock(&iwdev->ah_tbl_lock);
+ return 0;
+ }
+ hash_del(&ah->parent_ah->list);
+ kfree(ah->parent_ah);
+ mutex_unlock(&iwdev->ah_tbl_lock);
+ }
+
irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY,
false, NULL, ah);
@@ -4292,6 +4336,84 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags)
}
/**
+ * irdma_create_user_ah - create user address handle
+ * @ibah: address handle
+ * @attr: address handle attributes
+ * @udata: User data
+ *
+ * returns 0 on success, error otherwise
+ */
+static int irdma_create_user_ah(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd)
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ struct irdma_create_ah_resp uresp;
+ struct irdma_ah *parent_ah;
+ int err;
+
+ if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN)
+ return -EINVAL;
+
+ err = irdma_setup_ah(ibah, attr);
+ if (err)
+ return err;
+ mutex_lock(&iwdev->ah_tbl_lock);
+ if (!irdma_ah_exists(iwdev, ah)) {
+ err = irdma_create_hw_ah(iwdev, ah, true);
+ if (err) {
+ mutex_unlock(&iwdev->ah_tbl_lock);
+ return err;
+ }
+ /* Add new AH to list */
+ parent_ah = kmemdup(ah, sizeof(*ah), GFP_KERNEL);
+ if (parent_ah) {
+ u32 key = parent_ah->sc_ah.ah_info.dest_ip_addr[0] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[1] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[2] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[3];
+
+ ah->parent_ah = parent_ah;
+ hash_add(iwdev->ah_hash_tbl, &parent_ah->list, key);
+ refcount_set(&parent_ah->refcnt, 1);
+ }
+ }
+ mutex_unlock(&iwdev->ah_tbl_lock);
+
+ uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
+ err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen));
+ if (err)
+ irdma_destroy_ah(ibah, attr->flags);
+
+ return err;
+}
+
+/**
+ * irdma_create_ah - create address handle
+ * @ibah: address handle
+ * @attr: address handle attributes
+ * @udata: NULL
+ *
+ * returns 0 on success, error otherwise
+ */
+static int irdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ int err;
+
+ err = irdma_setup_ah(ibah, attr);
+ if (err)
+ return err;
+ err = irdma_create_hw_ah(iwdev, ah, attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+
+ return err;
+}
+
+/**
* irdma_query_ah - Query address handle
* @ibah: pointer to address handle
* @ah_attr: address handle attributes
@@ -4321,28 +4443,10 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_ETHERNET;
}
-static __be64 irdma_mac_to_guid(struct net_device *ndev)
-{
- const unsigned char *mac = ndev->dev_addr;
- __be64 guid;
- unsigned char *dst = (unsigned char *)&guid;
-
- dst[0] = mac[0] ^ 2;
- dst[1] = mac[1];
- dst[2] = mac[2];
- dst[3] = 0xff;
- dst[4] = 0xfe;
- dst[5] = mac[3];
- dst[6] = mac[4];
- dst[7] = mac[5];
-
- return guid;
-}
-
static const struct ib_device_ops irdma_roce_dev_ops = {
.attach_mcast = irdma_attach_mcast,
.create_ah = irdma_create_ah,
- .create_user_ah = irdma_create_ah,
+ .create_user_ah = irdma_create_user_ah,
.destroy_ah = irdma_destroy_ah,
.detach_mcast = irdma_detach_mcast,
.get_link_layer = irdma_get_link_layer,
@@ -4408,7 +4512,8 @@ static const struct ib_device_ops irdma_dev_ops = {
static void irdma_init_roce_device(struct irdma_device *iwdev)
{
iwdev->ibdev.node_type = RDMA_NODE_IB_CA;
- iwdev->ibdev.node_guid = irdma_mac_to_guid(iwdev->netdev);
+ addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
+ iwdev->netdev->dev_addr);
ib_set_device_ops(&iwdev->ibdev, &irdma_roce_dev_ops);
}
@@ -4421,7 +4526,8 @@ static int irdma_init_iw_device(struct irdma_device *iwdev)
struct net_device *netdev = iwdev->netdev;
iwdev->ibdev.node_type = RDMA_NODE_RNIC;
- ether_addr_copy((u8 *)&iwdev->ibdev.node_guid, netdev->dev_addr);
+ addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
+ netdev->dev_addr);
iwdev->ibdev.ops.iw_add_ref = irdma_qp_add_ref;
iwdev->ibdev.ops.iw_rem_ref = irdma_qp_rem_ref;
iwdev->ibdev.ops.iw_get_qp = irdma_get_qp;
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index d0fdef8d09ea..4309b7159f42 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -4,6 +4,7 @@
#define IRDMA_VERBS_H
#define IRDMA_MAX_SAVED_PHY_PGADDR 4
+#define IRDMA_FLUSH_DELAY_MS 20
#define IRDMA_PKEY_TBL_SZ 1
#define IRDMA_DEFAULT_PKEY 0xFFFF
@@ -25,14 +26,16 @@ struct irdma_pd {
struct irdma_sc_pd sc_pd;
};
+union irdma_sockaddr {
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+};
+
struct irdma_av {
u8 macaddr[16];
struct rdma_ah_attr attrs;
- union {
- struct sockaddr saddr;
- struct sockaddr_in saddr_in;
- struct sockaddr_in6 saddr_in6;
- } sgid_addr, dgid_addr;
+ union irdma_sockaddr sgid_addr;
+ union irdma_sockaddr dgid_addr;
u8 net_type;
};
@@ -43,6 +46,9 @@ struct irdma_ah {
struct irdma_av av;
u8 sgid_index;
union ib_gid dgid;
+ struct hlist_node list;
+ refcount_t refcnt;
+ struct irdma_ah *parent_ah; /* AH from cached list */
};
struct irdma_hmc_pble {
@@ -110,7 +116,7 @@ struct irdma_cq {
u16 cq_size;
u16 cq_num;
bool user_mode;
- bool armed;
+ atomic_t armed;
enum irdma_cmpl_notify last_notify;
u32 polled_cmpls;
u32 cq_mem_size;
@@ -121,6 +127,12 @@ struct irdma_cq {
struct irdma_pbl *iwpbl_shadow;
struct list_head resize_list;
struct irdma_cq_poll_info cur_cqe;
+ struct list_head cmpl_generated;
+};
+
+struct irdma_cmpl_gen {
+ struct list_head list;
+ struct irdma_cq_poll_info cpi;
};
struct disconn_work {
@@ -161,6 +173,7 @@ struct irdma_qp {
refcount_t refcnt;
struct iw_cm_id *cm_id;
struct irdma_cm_node *cm_node;
+ struct delayed_work dwork_flush;
struct ib_mr *lsmm_mr;
atomic_t hw_mod_qp_pend;
enum ib_qp_state ibqp_state;
@@ -224,4 +237,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev);
void irdma_ib_unregister_device(struct irdma_device *iwdev);
void irdma_ib_dealloc_device(struct ib_device *ibdev);
void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event);
+void irdma_generate_flush_completions(struct irdma_qp *iwqp);
+void irdma_remove_cmpls_list(struct irdma_cq *iwcq);
+int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info);
#endif /* IRDMA_VERBS_H */
diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c
index b0d6ee0739f5..20bc8d0d7f1f 100644
--- a/drivers/infiniband/hw/irdma/ws.c
+++ b/drivers/infiniband/hw/irdma/ws.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
/* Copyright (c) 2017 - 2021 Intel Corporation */
#include "osdep.h"
-#include "status.h"
#include "hmc.h"
#include "defs.h"
#include "type.h"
@@ -87,8 +86,8 @@ static void irdma_free_node(struct irdma_sc_vsi *vsi,
* @node: pointer to node
* @cmd: add, remove or modify
*/
-static enum irdma_status_code
-irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd)
+static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node, u8 cmd)
{
struct irdma_ws_node_info node_info = {};
@@ -106,7 +105,7 @@ irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd)
node_info.enable = node->enable;
if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) {
ibdev_dbg(to_ibdev(vsi->dev), "WS: CQP WS CMD failed\n");
- return IRDMA_ERR_NO_MEMORY;
+ return -ENOMEM;
}
if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) {
@@ -234,18 +233,18 @@ static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri)
* @vsi: vsi pointer
* @user_pri: user priority
*/
-enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
+int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
{
struct irdma_ws_node *ws_tree_root;
struct irdma_ws_node *vsi_node;
struct irdma_ws_node *tc_node;
u16 traffic_class;
- enum irdma_status_code ret = 0;
+ int ret = 0;
int i;
mutex_lock(&vsi->dev->ws_mutex);
if (vsi->tc_change_pending) {
- ret = IRDMA_ERR_NOT_READY;
+ ret = -EBUSY;
goto exit;
}
@@ -258,7 +257,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
ws_tree_root = irdma_alloc_node(vsi, user_pri,
WS_NODE_TYPE_PARENT, NULL);
if (!ws_tree_root) {
- ret = IRDMA_ERR_NO_MEMORY;
+ ret = -ENOMEM;
goto exit;
}
@@ -283,7 +282,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT,
ws_tree_root);
if (!vsi_node) {
- ret = IRDMA_ERR_NO_MEMORY;
+ ret = -ENOMEM;
goto vsi_add_err;
}
@@ -310,7 +309,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF,
vsi_node);
if (!tc_node) {
- ret = IRDMA_ERR_NO_MEMORY;
+ ret = -ENOMEM;
goto leaf_add_err;
}
diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h
index f0e16f630701..d431e3327d26 100644
--- a/drivers/infiniband/hw/irdma/ws.h
+++ b/drivers/infiniband/hw/irdma/ws.h
@@ -34,7 +34,7 @@ struct irdma_ws_node {
};
struct irdma_sc_vsi;
-enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri);
+int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri);
void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri);
void irdma_ws_reset(struct irdma_sc_vsi *vsi);
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index e2e1f5daddc4..111fa88a3be4 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -38,7 +38,6 @@
#include <rdma/ib_sa.h>
#include <rdma/ib_pack.h>
#include <linux/mlx4/cmd.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 4aff1c8298b1..12b481d138cf 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -80,6 +80,7 @@ struct cm_req_msg {
union ib_gid primary_path_sgid;
};
+static struct workqueue_struct *cm_wq;
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
@@ -288,10 +289,10 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
/*make sure that there is no schedule inside the scheduled work.*/
if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
- schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
} else if (id->scheduled_delete) {
/* Adjust timeout if already scheduled */
- mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
@@ -370,7 +371,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
ret = xa_err(item);
else
/* If a retry, adjust delayed work */
- mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
goto err_or_exists;
}
xa_unlock(&sriov->xa_rej_tmout);
@@ -393,7 +394,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
return xa_err(old);
}
- schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
return 0;
@@ -500,7 +501,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
xa_lock(&sriov->xa_rej_tmout);
xa_for_each(&sriov->xa_rej_tmout, id, item) {
if (slave < 0 || slave == item->slave) {
- mod_delayed_work(system_wq, &item->timeout, 0);
+ mod_delayed_work(cm_wq, &item->timeout, 0);
flush_needed = true;
++cnt;
}
@@ -508,7 +509,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
xa_unlock(&sriov->xa_rej_tmout);
if (flush_needed) {
- flush_scheduled_work();
+ flush_workqueue(cm_wq);
pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
cnt, slave);
}
@@ -540,7 +541,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
spin_unlock(&sriov->id_map_lock);
if (need_flush)
- flush_scheduled_work(); /* make sure all timers were flushed */
+ flush_workqueue(cm_wq); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
@@ -587,3 +588,17 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
rej_tmout_xa_cleanup(sriov, slave);
}
+
+int mlx4_ib_cm_init(void)
+{
+ cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0);
+ if (!cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_cm_destroy(void)
+{
+ destroy_workqueue(cm_wq);
+}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d13ecbdd4391..a37cfac5e23f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -96,7 +96,7 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
__be64 mlx4_ib_gen_node_guid(void)
{
#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
- return cpu_to_be64(NODE_GUID_HI | prandom_u32());
+ return cpu_to_be64(NODE_GUID_HI | get_random_u32());
}
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0d2fa3338784..ba47874f90d3 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -85,14 +85,6 @@ static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
static struct workqueue_struct *wq;
-static void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
@@ -471,7 +463,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
@@ -487,8 +479,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_RC_RNR_NAK_GEN;
+ props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -502,9 +494,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
- props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ props->kernel_cap_flags |= IBK_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
- props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
@@ -669,7 +661,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u32 port,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -721,7 +713,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u32 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == IB_SPEED_QDR) {
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -848,7 +840,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -870,7 +862,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
}
}
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -917,7 +909,7 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
in_mad->attr_mod = 0;
@@ -971,7 +963,7 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -1990,7 +1982,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
if (mlx4_is_master(dev->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
@@ -2784,10 +2776,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (err)
goto err_counter;
- ibdev->ib_uc_qpns_bitmap =
- kmalloc_array(BITS_TO_LONGS(ibdev->steer_qpn_count),
- sizeof(long),
- GFP_KERNEL);
+ ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
+ GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap)
goto err_steer_qp_release;
@@ -2875,7 +2865,7 @@ err_diag_counters:
mlx4_ib_diag_cleanup(ibdev);
err_steer_free_bitmap:
- kfree(ibdev->ib_uc_qpns_bitmap);
+ bitmap_free(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
@@ -2988,7 +2978,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
- kfree(ibdev->ib_uc_qpns_bitmap);
+ bitmap_free(ibdev->ib_uc_qpns_bitmap);
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
@@ -3247,7 +3237,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
- break;
+ return;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
@@ -3317,10 +3307,14 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
- err = mlx4_ib_mcg_init();
+ err = mlx4_ib_cm_init();
if (err)
goto clean_wq;
+ err = mlx4_ib_mcg_init();
+ if (err)
+ goto clean_cm;
+
err = mlx4_register_interface(&mlx4_ib_interface);
if (err)
goto clean_mcg;
@@ -3330,6 +3324,9 @@ static int __init mlx4_ib_init(void)
clean_mcg:
mlx4_ib_mcg_destroy();
+clean_cm:
+ mlx4_ib_cm_destroy();
+
clean_wq:
destroy_workqueue(wq);
return err;
@@ -3339,6 +3336,7 @@ static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
+ mlx4_ib_cm_destroy();
destroy_workqueue(wq);
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index d84023b4b1b8..6a3b0f121045 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -937,4 +937,7 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts);
+int mlx4_ib_cm_init(void);
+void mlx4_ib_cm_destroy(void);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 04a67b481608..a40bf58bcdd3 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
- mr->ibmr.length = length;
mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 6a381751c0d8..c4cf91235eee 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -320,7 +320,6 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
- nreq = 0;
goto out;
}
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index f43380106bd0..612ee8190a2d 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -19,6 +19,7 @@ mlx5_ib-y := ah.o \
restrack.o \
srq.o \
srq_cmd.o \
+ umr.o \
wr.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 0b61df52332a..290ea8ac3838 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -433,8 +433,7 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
dev->port[port_num].dbg_cc_params = dbg_cc_params;
- dbg_cc_params->root = debugfs_create_dir("cc_params",
- mdev->priv.dbg_root);
+ dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index a190fb581591..be189e0525de 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -328,8 +328,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
}
wc->vendor_err = cqe->vendor_err_synd;
- if (dump)
+ if (dump) {
+ mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
+ ib_wc_status_msg(wc->status));
dump_cqe(dev, cqe);
+ }
}
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
@@ -520,6 +523,10 @@ repoll:
"Requestor" : "Responder", cq->mcq.cqn);
mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (wc->status != IB_WC_WR_FLUSH_ERR &&
+ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
+ dev->umrc.state = MLX5_UMR_STATE_RECOVER;
+
if (opcode == MLX5_CQE_REQ_ERR) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 08b7f6bc56c3..2211a0be16f3 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -907,6 +907,7 @@ static bool devx_is_whitelist_cmd(void *in)
case MLX5_CMD_OP_QUERY_HCA_CAP:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
return true;
default:
return false;
@@ -962,6 +963,7 @@ static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
case MLX5_CMD_OP_QUERY_CONG_PARAMS:
case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
return true;
default:
return false;
@@ -1055,7 +1057,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
int cmd_out_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
void *cmd_out;
- int err;
+ int err, err2;
int uid;
c = devx_ufile2uctx(attrs);
@@ -1076,14 +1078,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(dev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
- cmd_out, cmd_out_len);
- if (err)
+ err = mlx5_cmd_do(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
return err;
- return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
cmd_out_len);
+
+ return err2 ?: err;
}
static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
@@ -1457,7 +1461,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
struct devx_obj *obj;
u16 obj_type = 0;
- int err;
+ int err, err2 = 0;
int uid;
u32 obj_id;
u16 opcode;
@@ -1497,15 +1501,18 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
!is_apu_cq(dev, cmd_in)) {
obj->flags |= DEVX_OBJ_FLAGS_CQ;
obj->core_cq.comp = devx_cq_comp;
- err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
- cmd_in, cmd_in_len, cmd_out,
- cmd_out_len);
+ err = mlx5_create_cq(dev->mdev, &obj->core_cq,
+ cmd_in, cmd_in_len, cmd_out,
+ cmd_out_len);
} else {
- err = mlx5_cmd_exec(dev->mdev, cmd_in,
- cmd_in_len,
- cmd_out, cmd_out_len);
+ err = mlx5_cmd_do(dev->mdev, cmd_in, cmd_in_len,
+ cmd_out, cmd_out_len);
}
+ if (err == -EREMOTEIO)
+ err2 = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+ cmd_out, cmd_out_len);
if (err)
goto obj_free;
@@ -1548,7 +1555,7 @@ obj_destroy:
sizeof(out));
obj_free:
kfree(obj);
- return err;
+ return err2 ?: err;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
@@ -1563,7 +1570,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
void *cmd_out;
- int err;
+ int err, err2;
int uid;
if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
@@ -1586,14 +1593,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
devx_set_umem_valid(cmd_in);
- err = mlx5_cmd_exec(mdev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
- cmd_out, cmd_out_len);
- if (err)
+ err = mlx5_cmd_do(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
return err;
- return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
cmd_out, cmd_out_len);
+
+ return err2 ?: err;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
@@ -1607,7 +1616,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
void *cmd_out;
- int err;
+ int err, err2;
int uid;
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
@@ -1629,14 +1638,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(mdev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
- cmd_out, cmd_out_len);
- if (err)
+ err = mlx5_cmd_do(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
return err;
- return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
cmd_out, cmd_out_len);
+
+ return err2 ?: err;
}
struct devx_async_event_queue {
@@ -1886,8 +1897,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
key_level2,
obj_event,
GFP_KERNEL);
- if (err)
+ if (err) {
+ kfree(obj_event);
return err;
+ }
INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
@@ -2147,32 +2160,39 @@ err:
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
- struct devx_umem *obj)
+ struct devx_umem *obj, u32 access_flags)
{
u64 addr;
size_t size;
- u32 access;
int err;
if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
return -EFAULT;
- err = uverbs_get_flags32(&access, attrs,
- MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
- IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
+ err = ib_check_mr_access(&dev->ib_dev, access_flags);
if (err)
return err;
- err = ib_check_mr_access(&dev->ib_dev, access);
- if (err)
- return err;
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD)) {
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int dmabuf_fd;
- obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
- if (IS_ERR(obj->umem))
- return PTR_ERR(obj->umem);
+ err = uverbs_get_raw_fd(&dmabuf_fd, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD);
+ if (err)
+ return -EFAULT;
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(
+ &dev->ib_dev, addr, size, dmabuf_fd, access_flags);
+ if (IS_ERR(umem_dmabuf))
+ return PTR_ERR(umem_dmabuf);
+ obj->umem = &umem_dmabuf->umem;
+ } else {
+ obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access_flags);
+ if (IS_ERR(obj->umem))
+ return PTR_ERR(obj->umem);
+ }
return 0;
}
@@ -2211,7 +2231,8 @@ static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem,
static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj,
- struct devx_umem_reg_cmd *cmd)
+ struct devx_umem_reg_cmd *cmd,
+ int access)
{
unsigned long pgsz_bitmap;
unsigned int page_size;
@@ -2260,6 +2281,9 @@ static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
MLX5_SET(umem, umem, page_offset,
ib_umem_dma_offset(obj->umem, page_size));
+ if (mlx5_umem_needs_ats(dev, obj->umem, access))
+ MLX5_SET(umem, umem, ats, 1);
+
mlx5_ib_populate_pas(obj->umem, page_size, mtt,
(obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
MLX5_IB_MTT_READ);
@@ -2277,20 +2301,30 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ int access_flags;
int err;
if (!c->devx_uid)
return -EINVAL;
+ err = uverbs_get_flags32(&access_flags, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (err)
+ return err;
+
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
if (!obj)
return -ENOMEM;
- err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+ err = devx_umem_get(dev, &c->ibucontext, attrs, obj, access_flags);
if (err)
goto err_obj_free;
- err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd);
+ err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd, access_flags);
if (err)
goto err_umem_release;
@@ -2822,6 +2856,8 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY),
+ UVERBS_ATTR_RAW_FD(MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD,
+ UA_OPTIONAL),
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
enum ib_access_flags),
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index 001d766cf291..3669c90b2dad 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -336,9 +336,15 @@ err_copy:
static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
{
- return uapi_type == MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM ?
- MLX5_SW_ICM_TYPE_STEERING :
- MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ switch (uapi_type) {
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ default:
+ return MLX5_SW_ICM_TYPE_STEERING;
+ }
}
static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
@@ -347,11 +353,32 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
int type)
{
struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
- enum mlx5_sw_icm_type icm_type = get_icm_type(type);
+ enum mlx5_sw_icm_type icm_type;
struct mlx5_ib_dm_icm *dm;
u64 act_size;
int err;
+ if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW))
+ return ERR_PTR(-EPERM);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2)))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return ERR_PTR(-ENOMEM);
@@ -359,19 +386,6 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
dm->base.type = type;
dm->base.ibdm.device = ctx->device;
- if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) {
- err = -EPERM;
- goto free;
- }
-
- if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))) {
- err = -EOPNOTSUPP;
- goto free;
- }
-
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
@@ -379,6 +393,8 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
act_size = roundup_pow_of_two(act_size);
dm->base.size = act_size;
+ icm_type = get_icm_type(type);
+
err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment,
to_mucontext(ctx)->devx_uid,
&dm->base.dev_addr, &dm->obj_id);
@@ -420,8 +436,8 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
return handle_alloc_dm_memic(context, attr, attrs);
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- return handle_alloc_dm_sw_icm(context, attr, attrs, type);
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return handle_alloc_dm_sw_icm(context, attr, attrs, type);
default:
return ERR_PTR(-EOPNOTSUPP);
@@ -474,6 +490,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
return 0;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
default:
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 6398e2f48579..e32111117a5e 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -32,6 +32,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
+#include <linux/sched/mm.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index b780185d9dc6..490ec308e309 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -15,7 +15,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
-#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
#include <net/inet_ecn.h>
#include "mlx5_ib.h"
@@ -148,16 +147,6 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
{
switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
case IB_FLOW_ACTION_UNSPECIFIED:
if (maction->flow_action_raw.sub_type ==
MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
@@ -368,14 +357,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev,
ib_spec->type & IB_FLOW_SPEC_INNER);
break;
case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
+ return -EOPNOTSUPP;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
@@ -587,47 +569,6 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
return false;
}
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop &&
- !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
const struct ib_flow_attr *flow_attr,
bool check_inner)
@@ -738,7 +679,15 @@ enum flow_table_type {
#define MLX5_FS_MAX_TYPES 6
#define MLX5_FS_MAX_ENTRIES BIT(16)
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
+}
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
@@ -747,6 +696,8 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
+ if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
@@ -843,8 +794,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
+ return _get_prio(dev, ns, prio, priority, max_table_size,
+ num_groups, flags);
return prio;
}
@@ -986,7 +937,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
prio = &dev->flow_db->opfcs[type];
if (!prio->flow_table) {
- prio = _get_prio(ns, prio, priority,
+ prio = _get_prio(dev, ns, prio, priority,
dev->num_ports * MAX_OPFC_RULES, 1, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
@@ -1154,12 +1105,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
- err = -EINVAL;
- goto free;
- }
-
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
struct mlx5_ib_mcounters *mcounters;
@@ -1472,8 +1417,8 @@ free_ucmd:
}
static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
+_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
+ enum mlx5_flow_namespace_type ns_type,
bool mcast)
{
struct mlx5_flow_namespace *ns = NULL;
@@ -1486,11 +1431,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+ priority = ib_prio_to_core_prio(user_priority, false);
esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- switch (fs_matcher->ns_type) {
+ switch (ns_type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
@@ -1508,7 +1453,7 @@ _get_flow_table(struct mlx5_ib_dev *dev,
!esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
break;
- case MLX5_FLOW_NAMESPACE_FDB:
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
max_table_size = BIT(
MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
@@ -1517,17 +1462,17 @@ _get_flow_table(struct mlx5_ib_dev *dev,
reformat_l3_tunnel_to_l2) &&
esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- priority = FDB_BYPASS_PATH;
+ priority = user_priority;
break;
case MLX5_FLOW_NAMESPACE_RDMA_RX:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
- priority = fs_matcher->priority;
+ priority = user_priority;
break;
case MLX5_FLOW_NAMESPACE_RDMA_TX:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
- priority = fs_matcher->priority;
+ priority = user_priority;
break;
default:
break;
@@ -1535,19 +1480,19 @@ _get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
- switch (fs_matcher->ns_type) {
+ switch (ns_type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
prio = &dev->flow_db->prios[priority];
break;
case MLX5_FLOW_NAMESPACE_EGRESS:
prio = &dev->flow_db->egress_prios[priority];
break;
- case MLX5_FLOW_NAMESPACE_FDB:
- prio = &dev->flow_db->fdb;
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ prio = &dev->flow_db->fdb[priority];
break;
case MLX5_FLOW_NAMESPACE_RDMA_RX:
prio = &dev->flow_db->rdma_rx[priority];
@@ -1564,7 +1509,7 @@ _get_flow_table(struct mlx5_ib_dev *dev,
if (prio->flow_table)
return prio;
- return _get_prio(ns, prio, priority, max_table_size,
+ return _get_prio(dev, ns, prio, priority, max_table_size,
MLX5_FS_MAX_TYPES, flags);
}
@@ -1683,7 +1628,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
+ ft_prio = _get_flow_table(dev, fs_matcher->priority,
+ fs_matcher->ns_type, mcast);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -1740,149 +1686,6 @@ unlock:
return ERR_PTR(err);
}
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \
- MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
{
switch (maction->flow_action_raw.sub_type) {
@@ -1906,13 +1709,6 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
struct mlx5_ib_flow_action *maction = to_mflow_act(action);
switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
case IB_FLOW_ACTION_UNSPECIFIED:
destroy_flow_action_raw(maction);
break;
@@ -1937,7 +1733,7 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
break;
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
- *namespace = MLX5_FLOW_NAMESPACE_FDB;
+ *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
break;
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
@@ -2029,8 +1825,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
}
/* Allow only DEVX object, drop as dest for FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
- (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
+ !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
return -EINVAL;
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
@@ -2050,7 +1846,7 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
if (!is_flow_dest(devx_obj, dest_id, dest_type))
return -EINVAL;
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
return -EINVAL;
@@ -2230,6 +2026,23 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
return 0;
}
+static int steering_anchor_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_steering_anchor *obj = uobject->object;
+
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
+
+ mutex_lock(&obj->dev->flow_db->lock);
+ put_flow_table(obj->dev, obj->ft_prio, true);
+ mutex_unlock(&obj->dev->flow_db->lock);
+
+ kfree(obj);
+ return 0;
+}
+
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
struct mlx5_ib_flow_matcher *obj)
{
@@ -2265,12 +2078,10 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
if (err)
return err;
- if (flags) {
- mlx5_ib_ft_type_to_namespace(
+ if (flags)
+ return mlx5_ib_ft_type_to_namespace(
MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
&obj->ns_type);
- return 0;
- }
}
obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
@@ -2320,7 +2131,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
if (err)
goto end;
- if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
err = -EINVAL;
goto end;
@@ -2336,6 +2147,75 @@ end:
return err;
}
+static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_ib_steering_anchor *obj;
+ struct mlx5_ib_flow_prio *ft_prio;
+ u16 priority;
+ u32 ft_id;
+ int err;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ err = uverbs_get_const(&ib_uapi_ft_type, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
+ if (err)
+ return err;
+
+ err = uverbs_copy_from(&priority, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
+ if (err)
+ return err;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ mutex_lock(&dev->flow_db->lock);
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+ if (IS_ERR(ft_prio)) {
+ mutex_unlock(&dev->flow_db->lock);
+ err = PTR_ERR(ft_prio);
+ goto free_obj;
+ }
+
+ ft_prio->refcount++;
+ ft_id = mlx5_flow_table_id(ft_prio->flow_table);
+ mutex_unlock(&dev->flow_db->lock);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ &ft_id, sizeof(ft_id));
+ if (err)
+ goto put_flow_table;
+
+ uobj->object = obj;
+ obj->dev = dev;
+ obj->ft_prio = ft_prio;
+ atomic_set(&obj->usecnt, 0);
+
+ return 0;
+
+put_flow_table:
+ mutex_lock(&dev->flow_db->lock);
+ put_flow_table(dev, ft_prio, true);
+ mutex_unlock(&dev->flow_db->lock);
+free_obj:
+ kfree(obj);
+
+ return err;
+}
+
static struct ib_flow_action *
mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
enum mlx5_ib_uapi_flow_table_type ft_type,
@@ -2692,6 +2572,35 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
+
const struct uapi_definition mlx5_ib_flow_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_FLOW_MATCHER),
@@ -2700,6 +2609,9 @@ const struct uapi_definition mlx5_ib_flow_defs[] = {
&mlx5_ib_fs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_actions),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
{},
};
@@ -2709,11 +2621,6 @@ static const struct ib_device_ops flow_ops = {
.destroy_flow_action = mlx5_ib_destroy_flow_action,
};
-static const struct ib_device_ops flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
@@ -2724,9 +2631,5 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops);
-
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 3ad8f637c589..b804f2dd5628 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -100,7 +100,7 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
port_type) == MLX5_CAP_PORT_TYPE_IB)
num_qps = pd->device->attrs.max_pkeys;
else if (dev->lag_active)
- num_qps = MLX5_MAX_PORTS;
+ num_qps = dev->lag_ports;
}
gsi = &mqp->gsi;
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index f2f62875d072..afeb5e53254f 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index ec242a5a17a3..9c8a7b206dcf 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -147,6 +147,28 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
+static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out,
+ size_t sz)
+{
+ u32 *in;
+ int err;
+
+ in = kvzalloc(sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(ppcnt_reg, in, local_port, port_num);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+ err = mlx5_core_access_reg(dev, in, sz, out,
+ sz, MLX5_REG_PPCNT, 0, 0);
+
+ kvfree(in);
+ return err;
+}
+
static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
@@ -166,6 +188,12 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
mdev = dev->mdev;
mdev_port_num = 1;
}
+ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
+ /* set local port to one for Function-Per-Port HCA. */
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
+
/* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
struct ib_class_port_info cpi = {};
@@ -202,8 +230,7 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
goto done;
}
- err = mlx5_core_query_ib_ppcnt(mdev, mdev_port_num,
- out_cnt, sz);
+ err = query_ib_ppcnt(mdev, mdev_port_num, out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
}
@@ -291,7 +318,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -318,7 +345,7 @@ static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
if (!in_mad)
return -ENOMEM;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad,
@@ -405,7 +432,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
@@ -430,7 +457,7 @@ int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
@@ -456,7 +483,7 @@ int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -485,7 +512,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -496,7 +523,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
memcpy(gid->raw, out_mad->data + 8, 8);
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -530,7 +557,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
/* props being zeroed by the caller, avoid zeroing it here */
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -584,6 +611,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
props->active_speed = IB_SPEED_HDR;
break;
+ case 8:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_NDR_SUP)
+ props->active_speed = IB_SPEED_NDR;
+ break;
}
}
@@ -591,7 +623,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
if (props->active_speed == 4) {
if (dev->port_caps[port - 1].ext_port_cap &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5ec8bd2f0b2f..c669ef6e47e7 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -26,7 +26,7 @@
#include <linux/mlx5/eswitch.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
#include <rdma/lag.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
@@ -41,12 +41,11 @@
#include "wr.h"
#include "restrack.h"
#include "counters.h"
-#include <linux/mlx5/accel.h>
+#include "umr.h"
#include <rdma/uverbs_std_types.h>
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
-#include <rdma/ib_umem_odp.h>
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
@@ -855,13 +854,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
- props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
+ props->kernel_cap_flags |= IBK_SG_GAPS_REG;
}
/* IB_WR_REG_MR always requires changing the entity size with UMR */
if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
+ props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -870,7 +869,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
- props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
if (MLX5_CAP_ETH(mdev, csum_cap)) {
@@ -906,10 +905,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP |
MLX5_RX_HASH_INNER;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- resp.rss_caps.rx_hash_fields_mask |=
- MLX5_RX_HASH_IPSEC_SPI;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
@@ -921,7 +916,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ props->kernel_cap_flags |= IBK_UD_TSO;
}
if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
@@ -997,7 +992,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
- props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+ props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
if (!uhw) {
/* ODP for kernel QPs is not implemented for receive
@@ -1018,11 +1013,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (MLX5_CAP_GEN(mdev, cd))
- props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
-
if (mlx5_core_is_vf(mdev))
- props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+ props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET && raw_support) {
@@ -1791,23 +1783,6 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
MLX5_CAP_GEN(dev->mdev,
num_of_uars_per_page) : 1;
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_EGRESS))
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
- }
-
resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
resp->num_ports = dev->num_ports;
@@ -1850,6 +1825,9 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
if (MLX5_CAP_GEN(dev->mdev, drain_sigerr))
resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG;
+
return 0;
}
@@ -2762,26 +2740,24 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
int err;
int port;
- for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) {
- dev->port_caps[port - 1].has_smi = false;
- if (MLX5_CAP_GEN(dev->mdev, port_type) ==
- MLX5_CAP_PORT_TYPE_IB) {
- if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
- err = mlx5_query_hca_vport_context(dev->mdev, 0,
- port, 0,
- &vport_ctx);
- if (err) {
- mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
- port, err);
- return err;
- }
- dev->port_caps[port - 1].has_smi =
- vport_ctx.has_smi;
- } else {
- dev->port_caps[port - 1].has_smi = true;
- }
+ if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ return 0;
+
+ for (port = 1; port <= dev->num_ports; port++) {
+ if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+ dev->port_caps[port - 1].has_smi = true;
+ continue;
}
+ err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->port_caps[port - 1].has_smi = vport_ctx.has_smi;
}
+
return 0;
}
@@ -3013,6 +2989,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
}
dev->flow_db->lag_demux_ft = ft;
+ dev->lag_ports = mlx5_lag_get_num_ports(mdev);
dev->lag_active = true;
return 0;
@@ -3605,13 +3582,6 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
&UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
ADD_UVERBS_ATTRIBUTES_SIMPLE(
- mlx5_ib_flow_action,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- enum mlx5_ib_uapi_flow_action_flags));
-
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_query_context,
UVERBS_OBJECT_DEVICE,
UVERBS_METHOD_QUERY_CONTEXT,
@@ -3628,8 +3598,6 @@ static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN(mlx5_ib_dm_defs),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
@@ -4034,16 +4002,11 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
int err;
- err = mlx5_mr_cache_cleanup(dev);
+ err = mlx5_mkey_cache_cleanup(dev);
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
- if (dev->umrc.qp)
- ib_destroy_qp(dev->umrc.qp);
- if (dev->umrc.cq)
- ib_free_cq(dev->umrc.cq);
- if (dev->umrc.pd)
- ib_dealloc_pd(dev->umrc.pd);
+ mlx5r_umr_resource_cleanup(dev);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -4051,112 +4014,19 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
-enum {
- MAX_UMR_WR = 128,
-};
-
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
int ret;
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
-
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- ret = PTR_ERR(pd);
- goto error_0;
- }
-
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
- if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
- ret = PTR_ERR(cq);
- goto error_2;
- }
-
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = ib_create_qp(pd, init_attr);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = ib_modify_qp(qp, attr,
- IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
-
- ret = ib_modify_qp(qp, attr, IB_QP_STATE);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = ib_modify_qp(qp, attr, IB_QP_STATE);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
- }
-
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
+ ret = mlx5r_umr_resource_init(dev);
+ if (ret)
+ return ret;
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
+ ret = mlx5_mkey_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
+ mlx5r_umr_resource_cleanup(dev);
}
-
- kfree(attr);
- kfree(init_attr);
-
- return 0;
-
-error_4:
- ib_destroy_qp(qp);
- dev->umrc.qp = NULL;
-
-error_3:
- ib_free_cq(cq);
- dev->umrc.cq = NULL;
-
-error_2:
- ib_dealloc_pd(pd);
- dev->umrc.pd = NULL;
-
-error_0:
- kfree(attr);
- kfree(init_attr);
return ret;
}
@@ -4178,7 +4048,7 @@ static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
if (!mlx5_debugfs_root)
return 0;
- root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
+ root = debugfs_create_dir("delay_drop", mlx5_debugfs_get_dev_root(dev->mdev));
dev->delay_drop.dir_debugfs = root;
debugfs_create_atomic_t("num_timeout_events", 0400, root,
@@ -4422,7 +4292,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev,
}
mutex_unlock(&mlx5_ib_multiport_mutex);
- dev_set_drvdata(&adev->dev, mpi);
+ auxiliary_set_drvdata(adev, mpi);
return 0;
}
@@ -4430,7 +4300,7 @@ static void mlx5r_mp_remove(struct auxiliary_device *adev)
{
struct mlx5_ib_multiport_info *mpi;
- mpi = dev_get_drvdata(&adev->dev);
+ mpi = auxiliary_get_drvdata(adev);
mutex_lock(&mlx5_ib_multiport_mutex);
if (mpi->ibdev)
mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
@@ -4468,7 +4338,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
dev->mdev = mdev;
dev->num_ports = num_ports;
- if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
+ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
profile = &raw_eth_profile;
else
profile = &pf_profile;
@@ -4480,7 +4350,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
return ret;
}
- dev_set_drvdata(&adev->dev, dev);
+ auxiliary_set_drvdata(adev, dev);
return 0;
}
@@ -4488,7 +4358,7 @@ static void mlx5r_remove(struct auxiliary_device *adev)
{
struct mlx5_ib_dev *dev;
- dev = dev_get_drvdata(&adev->dev);
+ dev = auxiliary_get_drvdata(adev);
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 844545064c9e..96ffbbaf0a73 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -30,8 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include <linux/jiffies.h>
@@ -153,6 +151,7 @@ static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
for (i = 0; i < 8; i++)
mlx5_write64(&mmio_wqe[i * 2],
bf->bfreg->map + bf->offset + i * 8);
+ io_stop_wc();
bf->offset ^= bf->buf_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4a7a56ed740b..4a7f7064bd0e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -232,6 +232,7 @@ enum {
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
#define MLX5_IB_NUM_SNIFFER_FTS 2
#define MLX5_IB_NUM_EGRESS_FTS 1
+#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
unsigned int refcount;
@@ -258,6 +259,12 @@ struct mlx5_ib_flow_matcher {
u8 match_criteria_enable;
};
+struct mlx5_ib_steering_anchor {
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_dev *dev;
+ atomic_t usecnt;
+};
+
struct mlx5_ib_pp {
u16 index;
struct mlx5_core_dev *mdev;
@@ -276,7 +283,7 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
- struct mlx5_ib_flow_prio fdb;
+ struct mlx5_ib_flow_prio fdb[MLX5_IB_NUM_FDB_FTS];
struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
@@ -290,16 +297,9 @@ struct mlx5_ib_flow_db {
};
/* Use macros here so that don't have to duplicate
- * enum ib_send_flags and enum ib_qp_type for low-level driver
+ * enum ib_qp_type for low-level driver
*/
-#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
-#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
-#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
-#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
-#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
-
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
* IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
@@ -310,9 +310,6 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
-#define MLX5_IB_UMR_OCTOWORD 16
-#define MLX5_IB_UMR_XLT_ALIGNMENT 64
-
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
@@ -538,24 +535,6 @@ struct mlx5_ib_cq_buf {
int nent;
};
-struct mlx5_umr_wr {
- struct ib_send_wr wr;
- u64 virt_addr;
- u64 offset;
- struct ib_pd *pd;
- unsigned int page_shift;
- unsigned int xlt_size;
- u64 length;
- int access_flags;
- u32 mkey;
- u8 ignore_free_state:1;
-};
-
-static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
-{
- return container_of(wr, struct mlx5_umr_wr, wr);
-}
-
enum mlx5_ib_cq_pr_flags {
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0,
MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1,
@@ -640,6 +619,7 @@ struct mlx5_ib_mkey {
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
+ struct mlx5_cache_ent *cache_ent;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -662,20 +642,10 @@ struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_ib_mkey mmkey;
- /* User MR data */
- struct mlx5_cache_ent *cache_ent;
+ struct ib_umem *umem;
- /* This is zero'd when the MR is allocated */
union {
- /* Used only while the MR is in the cache */
- struct {
- u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
- struct mlx5_async_work cb_work;
- /* Cache list element */
- struct list_head list;
- };
-
- /* Used only by kernel MRs */
+ /* Used only by kernel MRs (umem == NULL) */
struct {
void *descs;
void *descs_alloc;
@@ -696,9 +666,8 @@ struct mlx5_ib_mr {
int data_length;
};
- /* Used only by User MRs */
+ /* Used only by User MRs (umem != NULL) */
struct {
- struct ib_umem *umem;
unsigned int page_shift;
/* Current access_flags */
int access_flags;
@@ -715,12 +684,6 @@ struct mlx5_ib_mr {
};
};
-/* Zero the fields in the mr that are variant depending on usage */
-static inline void mlx5_clear_mr(struct mlx5_ib_mr *mr)
-{
- memset(mr->out, 0, sizeof(*mr) - offsetof(struct mlx5_ib_mr, out));
-}
-
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
{
return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
@@ -744,56 +707,67 @@ struct mlx5_ib_umr_context {
struct completion done;
};
+enum {
+ MLX5_UMR_STATE_UNINIT,
+ MLX5_UMR_STATE_ACTIVE,
+ MLX5_UMR_STATE_RECOVER,
+ MLX5_UMR_STATE_ERR,
+};
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- /* control access to UMR QP
+ /* Protects from UMR QP overflow
*/
struct semaphore sem;
+ /* Protects from using UMR while the UMR is not active
+ */
+ struct mutex lock;
+ unsigned int state;
};
struct mlx5_cache_ent {
- struct list_head head;
- /* sync access to the cahce entry
- */
- spinlock_t lock;
-
+ struct xarray mkeys;
+ unsigned long stored;
+ unsigned long reserved;
char name[4];
u32 order;
- u32 xlt;
u32 access_mode;
u32 page;
+ unsigned int ndescs;
u8 disabled:1;
u8 fill_to_high_water:1;
/*
- * - available_mrs is the length of list head, ie the number of MRs
- * available for immediate allocation.
- * - total_mrs is available_mrs plus all in use MRs that could be
- * returned to the cache.
- * - limit is the low water mark for available_mrs, 2* limit is the
+ * - limit is the low water mark for stored mkeys, 2* limit is the
* upper water mark.
- * - pending is the number of MRs currently being created
*/
- u32 total_mrs;
- u32 available_mrs;
+ u32 in_use;
u32 limit;
- u32 pending;
/* Statistics */
u32 miss;
struct mlx5_ib_dev *dev;
- struct work_struct work;
struct delayed_work dwork;
};
-struct mlx5_mr_cache {
+struct mlx5r_async_create_mkey {
+ union {
+ u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
+ u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
+ };
+ struct mlx5_async_work cb_work;
+ struct mlx5_cache_ent *ent;
+ u32 mkey;
+};
+
+struct mlx5_mkey_cache {
struct workqueue_struct *wq;
- struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
+ struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES];
struct dentry *root;
unsigned long last_add;
};
@@ -1092,7 +1066,7 @@ struct mlx5_ib_dev {
struct mlx5_ib_resources devr;
atomic_t mkey_var;
- struct mlx5_mr_cache cache;
+ struct mlx5_mkey_cache cache;
struct timer_list delay_timer;
/* Prevents soft lock on massive reg MRs */
struct mutex slow_path_mutex;
@@ -1131,6 +1105,7 @@ struct mlx5_ib_dev {
struct xarray sig_mrs;
struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
u16 pkey_table_len;
+ u8 lag_ports;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1290,9 +1265,6 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
struct uverbs_attr_bundle *attrs);
int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
-int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
- int page_shift, int flags);
-int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
@@ -1339,11 +1311,12 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
u64 access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- unsigned int entry, int access_flags);
+ struct mlx5_cache_ent *ent,
+ int access_flags);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
@@ -1367,7 +1340,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags);
@@ -1386,7 +1359,7 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {}
static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags) {}
@@ -1465,22 +1438,11 @@ extern const struct uapi_definition mlx5_ib_flow_defs[];
extern const struct uapi_definition mlx5_ib_qos_defs[];
extern const struct uapi_definition mlx5_ib_std_types_defs[];
-static inline void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
static inline int is_qp1(enum ib_qp_type qp_type)
{
return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
-#define MLX5_MAX_UMR_SHIFT 16
-#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
-
static inline u32 check_cq_create_flags(u32 flags)
{
/*
@@ -1546,71 +1508,12 @@ static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_suppor
MLX5_UARS_IN_PAGE : 1;
}
-static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi)
-{
- return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
-}
-
extern void *xlt_emergency_page;
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
-static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
- size_t length)
-{
- /*
- * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
- * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
- * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
- * can never be enabled without this capability. Simplify this weird
- * quirky hardware by just saying it can't use PAS lists with UMR at
- * all.
- */
- if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
- return false;
-
- /*
- * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
- * used.
- */
- if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
- length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
- return false;
- return true;
-}
-
-/*
- * true if an existing MR can be reconfigured to new access_flags using UMR.
- * Older HW cannot use UMR to update certain elements of the MKC. See
- * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
- */
-static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
- unsigned int current_access_flags,
- unsigned int target_access_flags)
-{
- unsigned int diffs = current_access_flags ^ target_access_flags;
-
- if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
- MLX5_CAP_GEN(dev->mdev, atomic) &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
- return false;
-
- if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
- !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
- return false;
-
- if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
- !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
- return false;
-
- return true;
-}
-
static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
struct mlx5_ib_mkey *mmkey)
{
@@ -1638,6 +1541,18 @@ int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
{
+ /*
+ * If the driver is in hash mode and the port_select_flow_table_bypass cap
+ * is supported, it means that the driver no longer needs to assign the port
+ * affinity by default. If a user wants to set the port affinity explicitly,
+ * the user has a dedicated API to do that, so there is no need to assign
+ * the port affinity by default.
+ */
+ if (dev->lag_active &&
+ mlx5_lag_mode_is_hash(dev->mdev) &&
+ MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
+ return 0;
+
return dev->lag_active ||
(MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
@@ -1648,4 +1563,40 @@ static inline bool rt_supported(int ts_cap)
return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
}
+
+/*
+ * PCI Peer to Peer is a trainwreck. If no switch is present then things
+ * sometimes work, depending on the pci_distance_p2p logic for excluding broken
+ * root complexes. However if a switch is present in the path, then things get
+ * really ugly depending on how the switch is setup. This table assumes that the
+ * root complex is strict and is validating that all req/reps are matches
+ * perfectly - so any scenario where it sees only half the transaction is a
+ * failure.
+ *
+ * CR/RR/DT ATS RO P2P
+ * 00X X X OK
+ * 010 X X fails (request is routed to root but root never sees comp)
+ * 011 0 X fails (request is routed to root but root never sees comp)
+ * 011 1 X OK
+ * 10X X 1 OK
+ * 101 X 0 fails (completion is routed to root but root didn't see req)
+ * 110 X 0 SLOW
+ * 111 0 0 SLOW
+ * 111 1 0 fails (completion is routed to root but root didn't see req)
+ * 111 1 1 OK
+ *
+ * Unfortunately we cannot reliably know if a switch is present or what the
+ * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that
+ * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows.
+ *
+ * For now assume if the umem is a dma_buf then it is P2P.
+ */
+static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
+ struct ib_umem *umem, int access_flags)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf)
+ return false;
+ return access_flags & IB_ACCESS_RELAXED_ORDERING;
+}
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 63e2129f1142..410cc5fd2523 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -39,18 +39,10 @@
#include <linux/delay.h>
#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
-#include <rdma/ib_verbs.h>
#include "dm.h"
#include "mlx5_ib.h"
-
-/*
- * We can't use an array for xlt_emergency_page because dma_map_single doesn't
- * work on kernel modules memory
- */
-void *xlt_emergency_page;
-static DEFINE_MUTEX(xlt_emergency_page_mutex);
+#include "umr.h"
enum {
MAX_PENDING_REG_MR = 8,
@@ -68,7 +60,6 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev);
MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
@@ -76,27 +67,27 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, mkc, lr, 1);
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
- MLX5_SET(mkc, mkc, relaxed_ordering_write,
- (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled);
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
- MLX5_SET(mkc, mkc, relaxed_ordering_read,
- (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled);
+ if ((acc & IB_ACCESS_RELAXED_ORDERING) &&
+ pcie_relaxed_ordering_enabled(dev->mdev->pdev)) {
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
+ }
MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET64(mkc, mkc, start_addr, start_addr);
}
-static void assign_mkey_variant(struct mlx5_ib_dev *dev,
- struct mlx5_ib_mkey *mkey, u32 *in)
+static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
{
u8 key = atomic_inc_return(&dev->mkey_var);
void *mkc;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, mkey_7_0, key);
- mkey->key = key;
+ *mkey = key;
}
static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
@@ -104,7 +95,7 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
{
int ret;
- assign_mkey_variant(dev, mkey, in);
+ assign_mkey_variant(dev, &mkey->key, in);
ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
if (!ret)
init_waitqueue_head(&mkey->wait);
@@ -112,27 +103,23 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
return ret;
}
-static int
-mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
- struct mlx5_ib_mkey *mkey,
- struct mlx5_async_ctx *async_ctx,
- u32 *in, int inlen, u32 *out, int outlen,
- struct mlx5_async_work *context)
+static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
{
- MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
- assign_mkey_variant(dev, mkey, in);
- return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
- create_mkey_callback, context);
+ struct mlx5_ib_dev *dev = async_create->ent->dev;
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
+
+ MLX5_SET(create_mkey_in, async_create->in, opcode,
+ MLX5_CMD_OP_CREATE_MKEY);
+ assign_mkey_variant(dev, &async_create->mkey, async_create->in);
+ return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
+ async_create->out, outlen, create_mkey_callback,
+ &async_create->cb_work);
}
-static int mr_cache_max_order(struct mlx5_ib_dev *dev);
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev);
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
-static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
-{
- return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
-}
-
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
@@ -140,186 +127,277 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
}
+static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
+{
+ if (status == -ENXIO) /* core driver is not available */
+ return;
+
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ if (status != -EREMOTEIO) /* driver specific failure */
+ return;
+
+ /* Failed in FW, print cmd out failure details */
+ mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
+}
+
+
+static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
+ void *to_store)
+{
+ XA_STATE(xas, &ent->mkeys, 0);
+ void *curr;
+
+ xa_lock_irq(&ent->mkeys);
+ if (limit_pendings &&
+ (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
+ xa_unlock_irq(&ent->mkeys);
+ return -EAGAIN;
+ }
+ while (1) {
+ /*
+ * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
+ * doesn't transparently unlock. Instead we set the xas index to
+ * the current value of reserved every iteration.
+ */
+ xas_set(&xas, ent->reserved);
+ curr = xas_load(&xas);
+ if (!curr) {
+ if (to_store && ent->stored == ent->reserved)
+ xas_store(&xas, to_store);
+ else
+ xas_store(&xas, XA_ZERO_ENTRY);
+ if (xas_valid(&xas)) {
+ ent->reserved++;
+ if (to_store) {
+ if (ent->stored != ent->reserved)
+ __xa_store(&ent->mkeys,
+ ent->stored,
+ to_store,
+ GFP_KERNEL);
+ ent->stored++;
+ queue_adjust_cache_locked(ent);
+ WRITE_ONCE(ent->dev->cache.last_add,
+ jiffies);
+ }
+ }
+ }
+ xa_unlock_irq(&ent->mkeys);
+
+ /*
+ * Notice xas_nomem() must always be called as it cleans
+ * up any cached allocation.
+ */
+ if (!xas_nomem(&xas, GFP_KERNEL))
+ break;
+ xa_lock_irq(&ent->mkeys);
+ }
+ if (xas_error(&xas))
+ return xas_error(&xas);
+ if (WARN_ON(curr))
+ return -EINVAL;
+ return 0;
+}
+
+static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
+{
+ void *old;
+
+ ent->reserved--;
+ old = __xa_erase(&ent->mkeys, ent->reserved);
+ WARN_ON(old);
+}
+
+static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
+{
+ void *old;
+
+ old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
+ WARN_ON(old);
+ ent->stored++;
+}
+
+static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
+{
+ void *old, *xa_mkey;
+
+ ent->stored--;
+ ent->reserved--;
+
+ if (ent->stored == ent->reserved) {
+ xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
+ WARN_ON(!xa_mkey);
+ return (u32)xa_to_value(xa_mkey);
+ }
+
+ xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
+ old = __xa_erase(&ent->mkeys, ent->reserved);
+ WARN_ON(old);
+ return (u32)xa_to_value(xa_mkey);
+}
+
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
- struct mlx5_ib_mr *mr =
- container_of(context, struct mlx5_ib_mr, cb_work);
- struct mlx5_cache_ent *ent = mr->cache_ent;
+ struct mlx5r_async_create_mkey *mkey_out =
+ container_of(context, struct mlx5r_async_create_mkey, cb_work);
+ struct mlx5_cache_ent *ent = mkey_out->ent;
struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
if (status) {
- mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
- kfree(mr);
- spin_lock_irqsave(&ent->lock, flags);
- ent->pending--;
+ create_mkey_warn(dev, status, mkey_out->out);
+ kfree(mkey_out);
+ xa_lock_irqsave(&ent->mkeys, flags);
+ undo_push_reserve_mkey(ent);
WRITE_ONCE(dev->fill_delay, 1);
- spin_unlock_irqrestore(&ent->lock, flags);
+ xa_unlock_irqrestore(&ent->mkeys, flags);
mod_timer(&dev->delay_timer, jiffies + HZ);
return;
}
- mr->mmkey.type = MLX5_MKEY_MR;
- mr->mmkey.key |= mlx5_idx_to_mkey(
- MLX5_GET(create_mkey_out, mr->out, mkey_index));
- init_waitqueue_head(&mr->mmkey.wait);
-
+ mkey_out->mkey |= mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
WRITE_ONCE(dev->cache.last_add, jiffies);
- spin_lock_irqsave(&ent->lock, flags);
- list_add_tail(&mr->list, &ent->head);
- ent->available_mrs++;
- ent->total_mrs++;
+ xa_lock_irqsave(&ent->mkeys, flags);
+ push_to_reserved(ent, mkey_out->mkey);
/* If we are doing fill_to_high_water then keep going. */
queue_adjust_cache_locked(ent);
- ent->pending--;
- spin_unlock_irqrestore(&ent->lock, flags);
+ xa_unlock_irqrestore(&ent->mkeys, flags);
+ kfree(mkey_out);
}
-static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
+static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
{
- struct mlx5_ib_mr *mr;
+ int ret = 0;
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return NULL;
- mr->cache_ent = ent;
+ switch (access_mode) {
+ case MLX5_MKC_ACCESS_MODE_MTT:
+ ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
+ sizeof(struct mlx5_mtt));
+ break;
+ case MLX5_MKC_ACCESS_MODE_KSM:
+ ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
+ sizeof(struct mlx5_klm));
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
+}
+static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
+{
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_mkc_octo_size(ent->access_mode, ent->ndescs));
MLX5_SET(mkc, mkc, log_page_size, ent->page);
- return mr;
}
/* Asynchronously schedule new MRs to be populated in the cache. */
static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
- size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mr *mr;
+ struct mlx5r_async_create_mkey *async_create;
void *mkc;
- u32 *in;
int err = 0;
int i;
- in = kzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
- mr = alloc_cache_mr(ent, mkc);
- if (!mr) {
- err = -ENOMEM;
- break;
- }
- spin_lock_irq(&ent->lock);
- if (ent->pending >= MAX_PENDING_REG_MR) {
- err = -EAGAIN;
- spin_unlock_irq(&ent->lock);
- kfree(mr);
- break;
- }
- ent->pending++;
- spin_unlock_irq(&ent->lock);
- err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
- &ent->dev->async_ctx, in, inlen,
- mr->out, sizeof(mr->out),
- &mr->cb_work);
+ async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
+ GFP_KERNEL);
+ if (!async_create)
+ return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
+ memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
+ async_create->ent = ent;
+
+ err = push_mkey(ent, true, NULL);
+ if (err)
+ goto free_async_create;
+
+ err = mlx5_ib_create_mkey_cb(async_create);
if (err) {
- spin_lock_irq(&ent->lock);
- ent->pending--;
- spin_unlock_irq(&ent->lock);
mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
- kfree(mr);
- break;
+ goto err_undo_reserve;
}
}
- kfree(in);
+ return 0;
+
+err_undo_reserve:
+ xa_lock_irq(&ent->mkeys);
+ undo_push_reserve_mkey(ent);
+ xa_unlock_irq(&ent->mkeys);
+free_async_create:
+ kfree(async_create);
return err;
}
/* Synchronously create a MR in the cache */
-static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
+static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
{
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
int err;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
- mr = alloc_cache_mr(ent, mkc);
- if (!mr) {
- err = -ENOMEM;
- goto free_in;
- }
-
- err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen);
+ err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
if (err)
- goto free_mr;
+ goto free_in;
- init_waitqueue_head(&mr->mmkey.wait);
- mr->mmkey.type = MLX5_MKEY_MR;
WRITE_ONCE(ent->dev->cache.last_add, jiffies);
- spin_lock_irq(&ent->lock);
- ent->total_mrs++;
- spin_unlock_irq(&ent->lock);
- kfree(in);
- return mr;
-free_mr:
- kfree(mr);
free_in:
kfree(in);
- return ERR_PTR(err);
+ return err;
}
static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
- struct mlx5_ib_mr *mr;
+ u32 mkey;
- lockdep_assert_held(&ent->lock);
- if (list_empty(&ent->head))
+ lockdep_assert_held(&ent->mkeys.xa_lock);
+ if (!ent->stored)
return;
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->available_mrs--;
- ent->total_mrs--;
- spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key);
- kfree(mr);
- spin_lock_irq(&ent->lock);
+ mkey = pop_stored_mkey(ent);
+ xa_unlock_irq(&ent->mkeys);
+ mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
+ xa_lock_irq(&ent->mkeys);
}
static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
bool limit_fill)
+ __acquires(&ent->mkeys) __releases(&ent->mkeys)
{
int err;
- lockdep_assert_held(&ent->lock);
+ lockdep_assert_held(&ent->mkeys.xa_lock);
while (true) {
if (limit_fill)
target = ent->limit * 2;
- if (target == ent->available_mrs + ent->pending)
+ if (target == ent->reserved)
return 0;
- if (target > ent->available_mrs + ent->pending) {
- u32 todo = target - (ent->available_mrs + ent->pending);
+ if (target > ent->reserved) {
+ u32 todo = target - ent->reserved;
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
err = add_keys(ent, todo);
if (err == -EAGAIN)
usleep_range(3000, 5000);
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (err) {
if (err != -EAGAIN)
return err;
@@ -344,15 +422,15 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
/*
* Target is the new value of total_mrs the user requests, however we
- * cannot free MRs that are in use. Compute the target value for
- * available_mrs.
+ * cannot free MRs that are in use. Compute the target value for stored
+ * mkeys.
*/
- spin_lock_irq(&ent->lock);
- if (target < ent->total_mrs - ent->available_mrs) {
+ xa_lock_irq(&ent->mkeys);
+ if (target < ent->in_use) {
err = -EINVAL;
goto err_unlock;
}
- target = target - (ent->total_mrs - ent->available_mrs);
+ target = target - ent->in_use;
if (target < ent->limit || target > ent->limit*2) {
err = -EINVAL;
goto err_unlock;
@@ -360,12 +438,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
err = resize_available_mrs(ent, target, false);
if (err)
goto err_unlock;
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
return count;
err_unlock:
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
return err;
}
@@ -376,7 +454,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
+ err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use);
if (err < 0)
return err;
@@ -405,10 +483,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
* Upon set we immediately fill the cache to high water mark implied by
* the limit.
*/
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
ent->limit = var;
err = resize_available_mrs(ent, 0, true);
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
if (err)
return err;
return count;
@@ -435,17 +513,17 @@ static const struct file_operations limit_fops = {
.read = limit_read,
};
-static bool someone_adding(struct mlx5_mr_cache *cache)
+static bool someone_adding(struct mlx5_mkey_cache *cache)
{
unsigned int i;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
struct mlx5_cache_ent *ent = &cache->ent[i];
bool ret;
- spin_lock_irq(&ent->lock);
- ret = ent->available_mrs < ent->limit;
- spin_unlock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
+ ret = ent->stored < ent->limit;
+ xa_unlock_irq(&ent->mkeys);
if (ret)
return true;
}
@@ -459,55 +537,54 @@ static bool someone_adding(struct mlx5_mr_cache *cache)
*/
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
{
- lockdep_assert_held(&ent->lock);
+ lockdep_assert_held(&ent->mkeys.xa_lock);
if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
return;
- if (ent->available_mrs < ent->limit) {
+ if (ent->stored < ent->limit) {
ent->fill_to_high_water = true;
- queue_work(ent->dev->cache.wq, &ent->work);
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
} else if (ent->fill_to_high_water &&
- ent->available_mrs + ent->pending < 2 * ent->limit) {
+ ent->reserved < 2 * ent->limit) {
/*
* Once we start populating due to hitting a low water mark
* continue until we pass the high water mark.
*/
- queue_work(ent->dev->cache.wq, &ent->work);
- } else if (ent->available_mrs == 2 * ent->limit) {
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
+ } else if (ent->stored == 2 * ent->limit) {
ent->fill_to_high_water = false;
- } else if (ent->available_mrs > 2 * ent->limit) {
+ } else if (ent->stored > 2 * ent->limit) {
/* Queue deletion of excess entries */
ent->fill_to_high_water = false;
- if (ent->pending)
+ if (ent->stored != ent->reserved)
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(1000));
else
- queue_work(ent->dev->cache.wq, &ent->work);
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
}
}
static void __cache_work_func(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_dev *dev = ent->dev;
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
int err;
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (ent->disabled)
goto out;
- if (ent->fill_to_high_water &&
- ent->available_mrs + ent->pending < 2 * ent->limit &&
+ if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit &&
!READ_ONCE(dev->fill_delay)) {
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
err = add_keys(ent, 1);
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (ent->disabled)
goto out;
if (err) {
/*
- * EAGAIN only happens if pending is positive, so we
- * will be rescheduled from reg_mr_callback(). The only
+ * EAGAIN only happens if there are pending MRs, so we
+ * will be rescheduled when storing them. The only
* failure path here is ENOMEM.
*/
if (err != -EAGAIN) {
@@ -519,7 +596,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
msecs_to_jiffies(1000));
}
}
- } else if (ent->available_mrs > 2 * ent->limit) {
+ } else if (ent->stored > 2 * ent->limit) {
bool need_delay;
/*
@@ -534,20 +611,22 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
* the garbage collection work to try to run in next cycle, in
* order to free CPU resources to other tasks.
*/
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
need_delay = need_resched() || someone_adding(cache) ||
!time_after(jiffies,
READ_ONCE(cache->last_add) + 300 * HZ);
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (ent->disabled)
goto out;
- if (need_delay)
+ if (need_delay) {
queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+ goto out;
+ }
remove_cache_mr_locked(ent);
queue_adjust_cache_locked(ent);
}
out:
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
}
static void delayed_cache_work_func(struct work_struct *work)
@@ -558,113 +637,64 @@ static void delayed_cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
-static void cache_work_func(struct work_struct *work)
-{
- struct mlx5_cache_ent *ent;
-
- ent = container_of(work, struct mlx5_cache_ent, work);
- __cache_work_func(ent);
-}
-
-/* Allocate a special entry from the cache */
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- unsigned int entry, int access_flags)
+ struct mlx5_cache_ent *ent,
+ int access_flags)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
+ int err;
- if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
- entry >= ARRAY_SIZE(cache->ent)))
- return ERR_PTR(-EINVAL);
-
- /* Matches access in alloc_cache_mr() */
- if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+ if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
return ERR_PTR(-EOPNOTSUPP);
- ent = &cache->ent[entry];
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- mr = create_cache_mr(ent);
- if (IS_ERR(mr))
- return mr;
- } else {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->available_mrs--;
- queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
-
- mlx5_clear_mr(mr);
- }
- mr->access_flags = access_flags;
- return mr;
-}
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
-/* Return a MR already available in the cache */
-static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
-{
- struct mlx5_ib_mr *mr = NULL;
- struct mlx5_cache_ent *ent = req_ent;
+ xa_lock_irq(&ent->mkeys);
+ ent->in_use++;
- spin_lock_irq(&ent->lock);
- if (!list_empty(&ent->head)) {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->available_mrs--;
+ if (!ent->stored) {
queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
- mlx5_clear_mr(mr);
- return mr;
+ ent->miss++;
+ xa_unlock_irq(&ent->mkeys);
+ err = create_cache_mkey(ent, &mr->mmkey.key);
+ if (err) {
+ xa_lock_irq(&ent->mkeys);
+ ent->in_use--;
+ xa_unlock_irq(&ent->mkeys);
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+ } else {
+ mr->mmkey.key = pop_stored_mkey(ent);
+ queue_adjust_cache_locked(ent);
+ xa_unlock_irq(&ent->mkeys);
}
- queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
- req_ent->miss++;
- return NULL;
-}
-
-static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
-{
- struct mlx5_cache_ent *ent = mr->cache_ent;
-
- spin_lock_irq(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->available_mrs++;
- queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
+ mr->mmkey.cache_ent = ent;
+ mr->mmkey.type = MLX5_MKEY_MR;
+ init_waitqueue_head(&mr->mmkey.wait);
+ return mr;
}
static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
- struct mlx5_ib_mr *tmp_mr;
- struct mlx5_ib_mr *mr;
- LIST_HEAD(del_list);
+ u32 mkey;
cancel_delayed_work(&ent->dwork);
- while (1) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- break;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_move(&mr->list, &del_list);
- ent->available_mrs--;
- ent->total_mrs--;
- spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
- }
-
- list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
- list_del(&mr->list);
- kfree(mr);
+ xa_lock_irq(&ent->mkeys);
+ while (ent->stored) {
+ mkey = pop_stored_mkey(ent);
+ xa_unlock_irq(&ent->mkeys);
+ mlx5_core_destroy_mkey(dev->mdev, mkey);
+ xa_lock_irq(&ent->mkeys);
}
+ xa_unlock_irq(&ent->mkeys);
}
-static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
if (!mlx5_debugfs_root || dev->is_rep)
return;
@@ -673,9 +703,9 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
dev->cache.root = NULL;
}
-static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
struct dentry *dir;
int i;
@@ -683,15 +713,15 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
if (!mlx5_debugfs_root || dev->is_rep)
return;
- cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
+ cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
ent = &cache->ent[i];
sprintf(ent->name, "%d", ent->order);
dir = debugfs_create_dir(ent->name, cache->root);
debugfs_create_file("size", 0600, dir, ent, &size_fops);
debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
- debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
+ debugfs_create_ulong("cur", 0400, dir, &ent->stored);
debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
}
@@ -703,9 +733,9 @@ static void delay_time_func(struct timer_list *t)
WRITE_ONCE(dev->fill_delay, 0);
}
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int i;
@@ -718,66 +748,62 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
ent = &cache->ent[i];
- INIT_LIST_HEAD(&ent->head);
- spin_lock_init(&ent->lock);
+ xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
ent->order = i + 2;
ent->dev = dev;
ent->limit = 0;
- INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- if (i > MR_CACHE_LAST_STD_ENTRY) {
- mlx5_odp_init_mr_cache_entry(ent);
+ if (i > MKEY_CACHE_LAST_STD_ENTRY) {
+ mlx5_odp_init_mkey_cache_entry(ent);
continue;
}
- if (ent->order > mr_cache_max_order(dev))
+ if (ent->order > mkey_cache_max_order(dev))
continue;
ent->page = PAGE_SHIFT;
- ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
- MLX5_IB_UMR_OCTOWORD;
+ ent->ndescs = 1 << ent->order;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
!dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
- mlx5_ib_can_load_pas_with_umr(dev, 0))
+ mlx5r_umr_can_load_pas(dev, 0))
ent->limit = dev->mdev->profile.mr_cache[i].limit;
else
ent->limit = 0;
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
}
- mlx5_mr_cache_debugfs_init(dev);
+ mlx5_mkey_cache_debugfs_init(dev);
return 0;
}
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
{
unsigned int i;
if (!dev->cache.wq)
return 0;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
struct mlx5_cache_ent *ent = &dev->cache.ent[i];
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
ent->disabled = true;
- spin_unlock_irq(&ent->lock);
- cancel_work_sync(&ent->work);
+ xa_unlock_irq(&ent->mkeys);
cancel_delayed_work_sync(&ent->dwork);
}
- mlx5_mr_cache_debugfs_cleanup(dev);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
clean_keys(dev, i);
destroy_workqueue(dev->cache.wq);
@@ -844,65 +870,22 @@ static int get_octo_len(u64 addr, u64 len, int page_shift)
return (npages + 1) / 2;
}
-static int mr_cache_max_order(struct mlx5_ib_dev *dev)
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return MR_CACHE_LAST_STD_ENTRY + 2;
+ return MKEY_CACHE_LAST_STD_ENTRY + 2;
return MLX5_MAX_UMR_SHIFT;
}
-static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
+ unsigned int order)
{
- struct mlx5_ib_umr_context *context =
- container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
-
- context->status = wc->status;
- complete(&context->done);
-}
-
-static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
-{
- context->cqe.done = mlx5_ib_umr_done;
- context->status = -1;
- init_completion(&context->done);
-}
-
-static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
- struct mlx5_umr_wr *umrwr)
-{
- struct umr_common *umrc = &dev->umrc;
- const struct ib_send_wr *bad;
- int err;
- struct mlx5_ib_umr_context umr_context;
-
- mlx5_ib_init_umr_context(&umr_context);
- umrwr->wr.wr_cqe = &umr_context.cqe;
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
- if (err) {
- mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
- up(&umrc->sem);
- return err;
-}
-
-static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
- unsigned int order)
-{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
if (order < cache->ent[0].order)
return &cache->ent[0];
order = order - cache->ent[0].order;
- if (order > MR_CACHE_LAST_STD_ENTRY)
+ if (order > MKEY_CACHE_LAST_STD_ENTRY)
return NULL;
return &cache->ent[order];
}
@@ -945,30 +928,24 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
0, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
- ent = mr_cache_ent_from_order(
+ ent = mkey_cache_ent_from_order(
dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
/*
* Matches access in alloc_cache_mr(). If the MR can't come from the
* cache then synchronously create an uncached one.
*/
if (!ent || ent->limit == 0 ||
- !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+ !mlx5r_umr_can_reconfig(dev, 0, access_flags) ||
+ mlx5_umem_needs_ats(dev, umem, access_flags)) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size, false);
mutex_unlock(&dev->slow_path_mutex);
return mr;
}
- mr = get_cache_mr(ent);
- if (!mr) {
- mr = create_cache_mr(ent);
- /*
- * The above already tried to do the same stuff as reg_create(),
- * no reason to try it again.
- */
- if (IS_ERR(mr))
- return mr;
- }
+ mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
+ if (IS_ERR(mr))
+ return mr;
mr->ibmr.pd = pd;
mr->umem = umem;
@@ -978,289 +955,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
-#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
- MLX5_UMR_MTT_ALIGNMENT)
-#define MLX5_SPARE_UMR_CHUNK 0x10000
-
-/*
- * Allocate a temporary buffer to hold the per-page information to transfer to
- * HW. For efficiency this should be as large as it can be, but buffer
- * allocation failure is not allowed, so try smaller sizes.
- */
-static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
-{
- const size_t xlt_chunk_align =
- MLX5_UMR_MTT_ALIGNMENT / ent_size;
- size_t size;
- void *res = NULL;
-
- static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
-
- /*
- * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
- * allocation can't trigger any kind of reclaim.
- */
- might_sleep();
-
- gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
-
- /*
- * If the system already has a suitable high order page then just use
- * that, but don't try hard to create one. This max is about 1M, so a
- * free x86 huge page will satisfy it.
- */
- size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
- MLX5_MAX_UMR_CHUNK);
- *nents = size / ent_size;
- res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
- get_order(size));
- if (res)
- return res;
-
- if (size > MLX5_SPARE_UMR_CHUNK) {
- size = MLX5_SPARE_UMR_CHUNK;
- *nents = size / ent_size;
- res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
- get_order(size));
- if (res)
- return res;
- }
-
- *nents = PAGE_SIZE / ent_size;
- res = (void *)__get_free_page(gfp_mask);
- if (res)
- return res;
-
- mutex_lock(&xlt_emergency_page_mutex);
- memset(xlt_emergency_page, 0, PAGE_SIZE);
- return xlt_emergency_page;
-}
-
-static void mlx5_ib_free_xlt(void *xlt, size_t length)
-{
- if (xlt == xlt_emergency_page) {
- mutex_unlock(&xlt_emergency_page_mutex);
- return;
- }
-
- free_pages((unsigned long)xlt, get_order(length));
-}
-
-/*
- * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
- * submission.
- */
-static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
- struct mlx5_umr_wr *wr, struct ib_sge *sg,
- size_t nents, size_t ent_size,
- unsigned int flags)
-{
- struct mlx5_ib_dev *dev = mr_to_mdev(mr);
- struct device *ddev = &dev->mdev->pdev->dev;
- dma_addr_t dma;
- void *xlt;
-
- xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
- flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
- GFP_KERNEL);
- sg->length = nents * ent_size;
- dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
- mlx5_ib_free_xlt(xlt, sg->length);
- return NULL;
- }
- sg->addr = dma;
- sg->lkey = dev->umrc.pd->local_dma_lkey;
-
- memset(wr, 0, sizeof(*wr));
- wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
- if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
- wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr->wr.sg_list = sg;
- wr->wr.num_sge = 1;
- wr->wr.opcode = MLX5_IB_WR_UMR;
- wr->pd = mr->ibmr.pd;
- wr->mkey = mr->mmkey.key;
- wr->length = mr->ibmr.length;
- wr->virt_addr = mr->ibmr.iova;
- wr->access_flags = mr->access_flags;
- wr->page_shift = mr->page_shift;
- wr->xlt_size = sg->length;
- return xlt;
-}
-
-static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
- struct ib_sge *sg)
-{
- struct device *ddev = &dev->mdev->pdev->dev;
-
- dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
- mlx5_ib_free_xlt(xlt, sg->length);
-}
-
-static unsigned int xlt_wr_final_send_flags(unsigned int flags)
-{
- unsigned int res = 0;
-
- if (flags & MLX5_IB_UPD_XLT_ENABLE)
- res |= MLX5_IB_SEND_UMR_ENABLE_MR |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
- res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- if (flags & MLX5_IB_UPD_XLT_ADDR)
- res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- return res;
-}
-
-int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
- int page_shift, int flags)
-{
- struct mlx5_ib_dev *dev = mr_to_mdev(mr);
- struct device *ddev = &dev->mdev->pdev->dev;
- void *xlt;
- struct mlx5_umr_wr wr;
- struct ib_sge sg;
- int err = 0;
- int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
- ? sizeof(struct mlx5_klm)
- : sizeof(struct mlx5_mtt);
- const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
- const int page_mask = page_align - 1;
- size_t pages_mapped = 0;
- size_t pages_to_map = 0;
- size_t pages_iter;
- size_t size_to_map = 0;
- size_t orig_sg_length;
-
- if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
- !umr_can_use_indirect_mkey(dev))
- return -EPERM;
-
- if (WARN_ON(!mr->umem->is_odp))
- return -EINVAL;
-
- /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
- * so we need to align the offset and length accordingly
- */
- if (idx & page_mask) {
- npages += idx & page_mask;
- idx &= ~page_mask;
- }
- pages_to_map = ALIGN(npages, page_align);
-
- xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
- if (!xlt)
- return -ENOMEM;
- pages_iter = sg.length / desc_size;
- orig_sg_length = sg.length;
-
- if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
- struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
-
- pages_to_map = min_t(size_t, pages_to_map, max_pages);
- }
-
- wr.page_shift = page_shift;
-
- for (pages_mapped = 0;
- pages_mapped < pages_to_map && !err;
- pages_mapped += pages_iter, idx += pages_iter) {
- npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
- size_to_map = npages * desc_size;
- dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
- DMA_TO_DEVICE);
- mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
- dma_sync_single_for_device(ddev, sg.addr, sg.length,
- DMA_TO_DEVICE);
-
- sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
-
- if (pages_mapped + pages_iter >= pages_to_map)
- wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
-
- wr.offset = idx * desc_size;
- wr.xlt_size = sg.length;
-
- err = mlx5_ib_post_send_wait(dev, &wr);
- }
- sg.length = orig_sg_length;
- mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
- return err;
-}
-
-/*
- * Send the DMA list to the HW for a normal MR using UMR.
- * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
- * flag may be used.
- */
-int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
-{
- struct mlx5_ib_dev *dev = mr_to_mdev(mr);
- struct device *ddev = &dev->mdev->pdev->dev;
- struct ib_block_iter biter;
- struct mlx5_mtt *cur_mtt;
- struct mlx5_umr_wr wr;
- size_t orig_sg_length;
- struct mlx5_mtt *mtt;
- size_t final_size;
- struct ib_sge sg;
- int err = 0;
-
- if (WARN_ON(mr->umem->is_odp))
- return -EINVAL;
-
- mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
- ib_umem_num_dma_blocks(mr->umem,
- 1 << mr->page_shift),
- sizeof(*mtt), flags);
- if (!mtt)
- return -ENOMEM;
- orig_sg_length = sg.length;
-
- cur_mtt = mtt;
- rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
- mr->umem->sgt_append.sgt.nents,
- BIT(mr->page_shift)) {
- if (cur_mtt == (void *)mtt + sg.length) {
- dma_sync_single_for_device(ddev, sg.addr, sg.length,
- DMA_TO_DEVICE);
- err = mlx5_ib_post_send_wait(dev, &wr);
- if (err)
- goto err;
- dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
- DMA_TO_DEVICE);
- wr.offset += sg.length;
- cur_mtt = mtt;
- }
-
- cur_mtt->ptag =
- cpu_to_be64(rdma_block_iter_dma_address(&biter) |
- MLX5_IB_MTT_PRESENT);
-
- if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
- cur_mtt->ptag = 0;
-
- cur_mtt++;
- }
-
- final_size = (void *)cur_mtt - (void *)mtt;
- sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
- memset(cur_mtt, 0, sg.length - final_size);
- wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
- wr.xlt_size = sg.length;
-
- dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
- err = mlx5_ib_post_send_wait(dev, &wr);
-
-err:
- sg.length = orig_sg_length;
- mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
- return err;
-}
-
/*
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
@@ -1323,6 +1017,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
MLX5_SET(mkc, mkc, translations_octword_size,
get_octo_len(iova, umem->length, mr->page_shift));
MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
+ if (mlx5_umem_needs_ats(dev, umem, access_flags))
+ MLX5_SET(mkc, mkc, ma_translation_mode, 1);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
get_octo_len(iova, umem->length, mr->page_shift));
@@ -1430,6 +1126,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
return ERR_PTR(-EINVAL);
@@ -1451,7 +1148,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
bool xlt_with_umr;
int err;
- xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
+ xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
} else {
@@ -1477,7 +1174,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
* configured properly but left disabled. It is safe to go ahead
* and configure it again via UMR while enabling it.
*/
- err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
+ err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ERR_PTR(err);
@@ -1514,7 +1211,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
}
/* ODP requires xlt update via umr to work. */
- if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
@@ -1576,7 +1273,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
if (!umem_dmabuf->sgt)
return;
- mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+ mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
}
@@ -1604,7 +1301,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
offset, virt_addr, length, fd, access_flags);
/* dmabuf requires xlt update via umr to work. */
- if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
@@ -1641,31 +1338,6 @@ err_dereg_mr:
return ERR_PTR(err);
}
-/**
- * revoke_mr - Fence all DMA on the MR
- * @mr: The MR to fence
- *
- * Upon return the NIC will not be doing any DMA to the pages under the MR,
- * and any DMA in progress will be completed. Failure of this function
- * indicates the HW has failed catastrophically.
- */
-static int revoke_mr(struct mlx5_ib_mr *mr)
-{
- struct mlx5_umr_wr umrwr = {};
-
- if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
- return 0;
-
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.pd = mr_to_mdev(mr)->umrc.pd;
- umrwr.mkey = mr->mmkey.key;
- umrwr.ignore_free_state = 1;
-
- return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
-}
-
/*
* True if the change in access flags can be done via UMR, only some access
* flags can be updated.
@@ -1679,32 +1351,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
return false;
- return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
- target_access_flags);
-}
-
-static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
- int access_flags)
-{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- struct mlx5_umr_wr umrwr = {
- .wr = {
- .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
- .opcode = MLX5_IB_WR_UMR,
- },
- .mkey = mr->mmkey.key,
- .pd = pd,
- .access_flags = access_flags,
- };
- int err;
-
- err = mlx5_ib_post_send_wait(dev, &umrwr);
- if (err)
- return err;
-
- mr->access_flags = access_flags;
- return 0;
+ return mlx5r_umr_can_reconfig(dev, current_access_flags,
+ target_access_flags);
}
static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
@@ -1715,16 +1363,16 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
/* We only track the allocated sizes of MRs from the cache */
- if (!mr->cache_ent)
+ if (!mr->mmkey.cache_ent)
return false;
- if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
+ if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
*page_size =
mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
if (WARN_ON(!*page_size))
return false;
- return (1ULL << mr->cache_ent->order) >=
+ return (1ULL << mr->mmkey.cache_ent->order) >=
ib_umem_num_dma_blocks(new_umem, *page_size);
}
@@ -1742,7 +1390,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
* with it. This ensure the change is atomic relative to any use of the
* MR.
*/
- err = revoke_mr(mr);
+ err = mlx5r_umr_revoke_mr(mr);
if (err)
return err;
@@ -1755,12 +1403,11 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
}
- mr->ibmr.length = new_umem->length;
mr->ibmr.iova = iova;
mr->ibmr.length = new_umem->length;
mr->page_shift = order_base_2(page_size);
mr->umem = new_umem;
- err = mlx5_ib_update_mr_pas(mr, upd_flags);
+ err = mlx5r_umr_update_mr_pas(mr, upd_flags);
if (err) {
/*
* The MR is revoked at this point so there is no issue to free
@@ -1807,7 +1454,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/* Fast path for PD/access change */
if (can_use_umr_rereg_access(dev, mr->access_flags,
new_access_flags)) {
- err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
+ err = mlx5r_umr_rereg_pd_access(mr, new_pd,
+ new_access_flags);
if (err)
return ERR_PTR(err);
return NULL;
@@ -1820,7 +1468,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
* Only one active MR can refer to a umem at one time, revoke
* the old MR before assigning the umem to the new one.
*/
- err = revoke_mr(mr);
+ err = mlx5r_umr_revoke_mr(mr);
if (err)
return ERR_PTR(err);
umem = mr->umem;
@@ -1904,18 +1552,19 @@ err:
return ret;
}
-static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+static void
+mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- int size = mr->max_descs * mr->desc_size;
-
- if (!mr->descs)
- return;
+ if (!mr->umem && mr->descs) {
+ struct ib_device *device = mr->ibmr.device;
+ int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
- DMA_TO_DEVICE);
- kfree(mr->descs_alloc);
- mr->descs = NULL;
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
+ kfree(mr->descs_alloc);
+ mr->descs = NULL;
+ }
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -1963,15 +1612,17 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
/* Stop DMA */
- if (mr->cache_ent) {
- if (revoke_mr(mr)) {
- spin_lock_irq(&mr->cache_ent->lock);
- mr->cache_ent->total_mrs--;
- spin_unlock_irq(&mr->cache_ent->lock);
- mr->cache_ent = NULL;
- }
+ if (mr->mmkey.cache_ent) {
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ mr->mmkey.cache_ent->in_use--;
+ xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
+
+ if (mlx5r_umr_revoke_mr(mr) ||
+ push_mkey(mr->mmkey.cache_ent, false,
+ xa_mk_value(mr->mmkey.key)))
+ mr->mmkey.cache_ent = NULL;
}
- if (!mr->cache_ent) {
+ if (!mr->mmkey.cache_ent) {
rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
if (rc)
return rc;
@@ -1988,13 +1639,10 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
mlx5_ib_free_odp_mr(mr);
}
- if (mr->cache_ent) {
- mlx5_mr_cache_free(dev, mr);
- } else {
- if (!udata)
- mlx5_free_priv_descs(mr);
- kfree(mr);
- }
+ if (!mr->mmkey.cache_ent)
+ mlx5_free_priv_descs(mr);
+
+ kfree(mr);
return 0;
}
@@ -2079,6 +1727,7 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
if (err)
goto err_free_in;
+ mr->umem = NULL;
kfree(in);
return mr;
@@ -2205,6 +1854,7 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
}
mr->ibmr.device = pd->device;
+ mr->umem = NULL;
switch (mr_type) {
case IB_MR_TYPE_MEM_REG:
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91eb615b89ee..bc97958818bb 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <linux/kernel.h>
#include <linux/dma-buf.h>
@@ -38,6 +37,7 @@
#include "mlx5_ib.h"
#include "cmd.h"
+#include "umr.h"
#include "qp.h"
#include <linux/mlx5/eq.h>
@@ -117,7 +117,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
*
* xa_store()
* mutex_lock(umem_mutex)
- * mlx5_ib_update_xlt()
+ * mlx5r_umr_update_xlt()
* mutex_unlock(umem_mutex)
* destroy lkey
*
@@ -198,9 +198,9 @@ static void free_implicit_child_mr_work(struct work_struct *work)
mlx5r_deref_wait_odp_mkey(&mr->mmkey);
mutex_lock(&odp_imr->umem_mutex);
- mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT,
- 1, 0,
- MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5r_umr_update_xlt(mr->parent,
+ ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
@@ -282,19 +282,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
- mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx, 0,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5r_umr_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
}
}
}
if (in_block)
- mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx + 1, 0,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5r_umr_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx + 1, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
mlx5_update_odp_stats(mr, invalidations, invalidations);
@@ -323,8 +323,7 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
memset(caps, 0, sizeof(*caps));
- if (!MLX5_CAP_GEN(dev->mdev, pg) ||
- !mlx5_ib_can_load_pas_with_umr(dev, 0))
+ if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
@@ -407,6 +406,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
unsigned long idx)
{
+ struct mlx5_ib_dev *dev = mr_to_mdev(imr);
struct ib_umem_odp *odp;
struct mlx5_ib_mr *mr;
struct mlx5_ib_mr *ret;
@@ -418,13 +418,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
if (IS_ERR(odp))
return ERR_CAST(odp);
- mr = mlx5_mr_cache_alloc(
- mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags);
+ mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY],
+ imr->access_flags);
if (IS_ERR(mr)) {
ib_umem_odp_release(odp);
return mr;
}
+ mr->access_flags = imr->access_flags;
mr->ibmr.pd = imr->ibmr.pd;
mr->ibmr.device = &mr_to_mdev(imr)->ib_dev;
mr->umem = &odp->umem;
@@ -440,11 +441,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
*/
refcount_set(&mr->mmkey.usecount, 2);
- err = mlx5_ib_update_xlt(mr, 0,
- MLX5_IMR_MTT_ENTRIES,
- PAGE_SHIFT,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ENABLE);
+ err = mlx5r_umr_update_xlt(mr, 0,
+ MLX5_IMR_MTT_ENTRIES,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
if (err) {
ret = ERR_PTR(err);
goto out_mr;
@@ -485,36 +486,37 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
- if (!mlx5_ib_can_load_pas_with_umr(dev,
- MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
+ if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
- imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags);
+ imr = mlx5_mr_cache_alloc(dev,
+ &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY],
+ access_flags);
if (IS_ERR(imr)) {
ib_umem_odp_release(umem_odp);
return imr;
}
+ imr->access_flags = access_flags;
imr->ibmr.pd = &pd->ibpd;
imr->ibmr.iova = 0;
imr->umem = &umem_odp->umem;
imr->ibmr.lkey = imr->mmkey.key;
imr->ibmr.rkey = imr->mmkey.key;
imr->ibmr.device = &dev->ib_dev;
- imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
xa_init(&imr->implicit_children);
- err = mlx5_ib_update_xlt(imr, 0,
- mlx5_imr_ksm_entries,
- MLX5_KSM_PAGE_SHIFT,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ENABLE);
+ err = mlx5r_umr_update_xlt(imr, 0,
+ mlx5_imr_ksm_entries,
+ MLX5_KSM_PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
if (err)
goto out_mr;
@@ -577,7 +579,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
* No need to check whether the MTTs really belong to this MR, since
* ib_umem_odp_map_dma_and_lock already checks this.
*/
- ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
+ ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
@@ -675,9 +677,9 @@ out:
* next pagefault handler will see the new information.
*/
mutex_lock(&odp_imr->umem_mutex);
- err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ATOMIC);
+ err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
if (err) {
mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
@@ -711,7 +713,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
- err = mlx5_ib_update_mr_pas(mr, xlt_flags);
+ err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
@@ -792,7 +794,8 @@ static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key)
{
if (!mmkey)
return false;
- if (mmkey->type == MLX5_MKEY_MW)
+ if (mmkey->type == MLX5_MKEY_MW ||
+ mmkey->type == MLX5_MKEY_INDIRECT_DEVX)
return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key);
return mmkey->key == key;
}
@@ -1541,16 +1544,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
.nent = MLX5_IB_NUM_PF_EQE,
};
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
- if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
- err = -ENOMEM;
- goto err_wq;
- }
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
- free_cpumask_var(param.affinity);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
@@ -1591,7 +1588,7 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
return err;
}
-void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
+void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
{
if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return;
@@ -1599,18 +1596,14 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
switch (ent->order - 2) {
case MLX5_IMR_MTT_CACHE_ENTRY:
ent->page = PAGE_SHIFT;
- ent->xlt = MLX5_IMR_MTT_ENTRIES *
- sizeof(struct mlx5_mtt) /
- MLX5_IB_UMR_OCTOWORD;
+ ent->ndescs = MLX5_IMR_MTT_ENTRIES;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
ent->limit = 0;
break;
case MLX5_IMR_KSM_CACHE_ENTRY:
ent->page = MLX5_KSM_PAGE_SHIFT;
- ent->xlt = mlx5_imr_ksm_entries *
- sizeof(struct mlx5_klm) /
- MLX5_IB_UMR_OCTOWORD;
+ ent->ndescs = mlx5_imr_ksm_entries;
ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
ent->limit = 0;
break;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e5abbcfc1d57..40d9410ec303 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -30,7 +30,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
@@ -40,6 +40,7 @@
#include "ib_rep.h"
#include "counters.h"
#include "cmd.h"
+#include "umr.h"
#include "qp.h"
#include "wr.h"
@@ -614,7 +615,8 @@ enum {
static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
{
- return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
+ return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
+ bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR;
}
static int num_med_bfreg(struct mlx5_ib_dev *dev,
@@ -3906,7 +3908,7 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
return (unsigned int)atomic_add_return(1, tx_port_affinity) %
- MLX5_MAX_PORTS + 1;
+ (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1;
}
static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
@@ -4464,6 +4466,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
MLX5_ST_SZ_BYTES(create_dct_in), out,
sizeof(out));
+ err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out);
if (err)
return err;
resp.dctn = qp->dct.mdct.mqp.qpn;
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index 8844eacf2380..542e4c63a8de 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -220,7 +220,7 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
init_completion(&dct->drained);
MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
- err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
+ err = mlx5_cmd_do(dev->mdev, in, inlen, out, outlen);
if (err)
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 191c4ee7db62..09b365a98bbf 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -3,7 +3,6 @@
* Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
-#include <linux/module.h>
#include <linux/mlx5/qp.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
new file mode 100644
index 000000000000..d5105b5c9979
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -0,0 +1,761 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <rdma/ib_umem_odp.h>
+#include "mlx5_ib.h"
+#include "umr.h"
+#include "wr.h"
+
+/*
+ * We can't use an array for xlt_emergency_page because dma_map_single doesn't
+ * work on kernel modules memory
+ */
+void *xlt_emergency_page;
+static DEFINE_MUTEX(xlt_emergency_page_mutex);
+
+static __be64 get_umr_enable_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_disable_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_translation_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW;
+
+ if (MLX5_CAP_GEN(dev->mdev, atomic))
+ result |= MLX5_MKEY_MASK_A;
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_pd_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_PD;
+
+ return cpu_to_be64(result);
+}
+
+static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
+{
+ if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return -EPERM;
+
+ return 0;
+}
+
+enum {
+ MAX_UMR_WR = 128,
+};
+
+static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
+{
+ struct ib_qp_attr attr = {};
+ int ret;
+
+ attr.qp_state = IB_QPS_INIT;
+ attr.port_num = 1;
+ ret = ib_modify_qp(qp, &attr,
+ IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.qp_state = IB_QPS_RTR;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.qp_state = IB_QPS_RTS;
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+{
+ struct ib_qp_init_attr init_attr = {};
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret;
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ return PTR_ERR(pd);
+ }
+
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto destroy_pd;
+ }
+
+ init_attr.send_cq = cq;
+ init_attr.recv_cq = cq;
+ init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr.cap.max_send_wr = MAX_UMR_WR;
+ init_attr.cap.max_send_sge = 1;
+ init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr.port_num = 1;
+ qp = ib_create_qp(pd, &init_attr);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto destroy_cq;
+ }
+
+ ret = mlx5r_umr_qp_rst2rts(dev, qp);
+ if (ret)
+ goto destroy_qp;
+
+ dev->umrc.qp = qp;
+ dev->umrc.cq = cq;
+ dev->umrc.pd = pd;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ mutex_init(&dev->umrc.lock);
+ dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
+
+ return 0;
+
+destroy_qp:
+ ib_destroy_qp(qp);
+destroy_cq:
+ ib_free_cq(cq);
+destroy_pd:
+ ib_dealloc_pd(pd);
+ return ret;
+}
+
+void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
+ return;
+ ib_destroy_qp(dev->umrc.qp);
+ ib_free_cq(dev->umrc.cq);
+ ib_dealloc_pd(dev->umrc.pd);
+}
+
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_qp_attr attr;
+ int err;
+
+ attr.qp_state = IB_QPS_RESET;
+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+ if (err) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto err;
+ }
+
+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+ if (err)
+ goto err;
+
+ umrc->state = MLX5_UMR_STATE_ACTIVE;
+ return 0;
+
+err:
+ umrc->state = MLX5_UMR_STATE_ERR;
+ return err;
+}
+
+static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+ struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+ unsigned int wqe_size =
+ with_data ? sizeof(struct mlx5r_umr_wqe) :
+ sizeof(struct mlx5r_umr_wqe) -
+ sizeof(struct mlx5_wqe_data_seg);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ union {
+ struct ib_cqe *ib_cqe;
+ u64 wr_id;
+ } id;
+ void *cur_edge, *seg;
+ unsigned long flags;
+ unsigned int idx;
+ int size, err;
+
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
+ return -EIO;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
+ cpu_to_be32(mkey), false, false);
+ if (WARN_ON(err))
+ goto out;
+
+ qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+
+ mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
+
+ id.ib_cqe = cqe;
+ mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
+ MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
+
+ mlx5r_ring_db(qp, 1, ctrl);
+
+out:
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return err;
+}
+
+static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx5_ib_umr_context *context =
+ container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
+
+ context->status = wc->status;
+ complete(&context->done);
+}
+
+static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
+{
+ context->cqe.done = mlx5r_umr_done;
+ init_completion(&context->done);
+}
+
+static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
+ struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct mlx5r_umr_context umr_context;
+ int err;
+
+ err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
+ if (WARN_ON(err))
+ return err;
+
+ mlx5r_umr_init_context(&umr_context);
+
+ down(&umrc->sem);
+ while (true) {
+ mutex_lock(&umrc->lock);
+ if (umrc->state == MLX5_UMR_STATE_ERR) {
+ mutex_unlock(&umrc->lock);
+ err = -EFAULT;
+ break;
+ }
+
+ if (umrc->state == MLX5_UMR_STATE_RECOVER) {
+ mutex_unlock(&umrc->lock);
+ usleep_range(3000, 5000);
+ continue;
+ }
+
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+ with_data);
+ mutex_unlock(&umrc->lock);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
+ err);
+ break;
+ }
+
+ wait_for_completion(&umr_context.done);
+
+ if (umr_context.status == IB_WC_SUCCESS)
+ break;
+
+ if (umr_context.status == IB_WC_WR_FLUSH_ERR)
+ continue;
+
+ WARN_ON_ONCE(1);
+ mlx5_ib_warn(dev,
+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
+ umr_context.status);
+ mutex_lock(&umrc->lock);
+ err = mlx5r_umr_recover(dev);
+ mutex_unlock(&umrc->lock);
+ if (err)
+ mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
+ err);
+ err = -EFAULT;
+ break;
+ }
+ up(&umrc->sem);
+ return err;
+}
+
+/**
+ * mlx5r_umr_revoke_mr - Fence all DMA on the MR
+ * @mr: The MR to fence
+ *
+ * Upon return the NIC will not be doing any DMA to the pages under the MR,
+ * and any DMA in progress will be completed. Failure of this function
+ * indicates the HW has failed catastrophically.
+ */
+int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct mlx5r_umr_wqe wqe = {};
+
+ if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return 0;
+
+ wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+ wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
+ wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
+
+ MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
+ MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+ mlx5_mkey_variant(mr->mmkey.key));
+
+ return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+}
+
+static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
+ struct mlx5_mkey_seg *seg,
+ unsigned int access_flags)
+{
+ MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, seg, lr, 1);
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+ MLX5_SET(mkc, seg, relaxed_ordering_read,
+ !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+}
+
+int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct mlx5r_umr_wqe wqe = {};
+ int err;
+
+ wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
+ wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+ wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
+ wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
+
+ mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+ mlx5_mkey_variant(mr->mmkey.key));
+
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+ if (err)
+ return err;
+
+ mr->access_flags = access_flags;
+ return 0;
+}
+
+#define MLX5_MAX_UMR_CHUNK \
+ ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
+#define MLX5_SPARE_UMR_CHUNK 0x10000
+
+/*
+ * Allocate a temporary buffer to hold the per-page information to transfer to
+ * HW. For efficiency this should be as large as it can be, but buffer
+ * allocation failure is not allowed, so try smaller sizes.
+ */
+static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
+{
+ const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
+ size_t size;
+ void *res = NULL;
+
+ static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
+
+ /*
+ * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
+ * allocation can't trigger any kind of reclaim.
+ */
+ might_sleep();
+
+ gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
+
+ /*
+ * If the system already has a suitable high order page then just use
+ * that, but don't try hard to create one. This max is about 1M, so a
+ * free x86 huge page will satisfy it.
+ */
+ size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
+ MLX5_MAX_UMR_CHUNK);
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+
+ if (size > MLX5_SPARE_UMR_CHUNK) {
+ size = MLX5_SPARE_UMR_CHUNK;
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+ }
+
+ *nents = PAGE_SIZE / ent_size;
+ res = (void *)__get_free_page(gfp_mask);
+ if (res)
+ return res;
+
+ mutex_lock(&xlt_emergency_page_mutex);
+ memset(xlt_emergency_page, 0, PAGE_SIZE);
+ return xlt_emergency_page;
+}
+
+static void mlx5r_umr_free_xlt(void *xlt, size_t length)
+{
+ if (xlt == xlt_emergency_page) {
+ mutex_unlock(&xlt_emergency_page_mutex);
+ return;
+ }
+
+ free_pages((unsigned long)xlt, get_order(length));
+}
+
+static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+ struct ib_sge *sg)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+
+ dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
+ mlx5r_umr_free_xlt(xlt, sg->length);
+}
+
+/*
+ * Create an XLT buffer ready for submission.
+ */
+static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
+ size_t nents, size_t ent_size,
+ unsigned int flags)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+ dma_addr_t dma;
+ void *xlt;
+
+ xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
+ flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
+ GFP_KERNEL);
+ sg->length = nents * ent_size;
+ dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+ mlx5r_umr_free_xlt(xlt, sg->length);
+ return NULL;
+ }
+ sg->addr = dma;
+ sg->lkey = dev->umrc.pd->local_dma_lkey;
+
+ return xlt;
+}
+
+static void
+mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+ unsigned int flags, struct ib_sge *sg)
+{
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ /* fail if free */
+ ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
+ else
+ /* fail if not free */
+ ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
+ ctrl_seg->xlt_octowords =
+ cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+}
+
+static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
+ struct mlx5_mkey_seg *mkey_seg,
+ struct mlx5_ib_mr *mr,
+ unsigned int page_shift)
+{
+ mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
+ MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
+ MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
+ MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
+ MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
+ MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
+}
+
+static void
+mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
+ struct ib_sge *sg)
+{
+ data_seg->byte_count = cpu_to_be32(sg->length);
+ data_seg->lkey = cpu_to_be32(sg->lkey);
+ data_seg->addr = cpu_to_be64(sg->addr);
+}
+
+static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+ u64 offset)
+{
+ u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
+
+ ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
+ ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
+ ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+}
+
+static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
+ struct mlx5r_umr_wqe *wqe,
+ struct mlx5_ib_mr *mr, struct ib_sge *sg,
+ unsigned int flags)
+{
+ bool update_pd_access, update_translation;
+
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
+
+ update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
+ flags & MLX5_IB_UPD_XLT_PD ||
+ flags & MLX5_IB_UPD_XLT_ACCESS;
+
+ if (update_pd_access) {
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+ }
+
+ update_translation =
+ flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
+
+ if (update_translation) {
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
+ if (!mr->ibmr.length)
+ MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
+ }
+
+ wqe->ctrl_seg.xlt_octowords =
+ cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+ wqe->data_seg.byte_count = cpu_to_be32(sg->length);
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
+ */
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ struct mlx5r_umr_wqe wqe = {};
+ struct ib_block_iter biter;
+ struct mlx5_mtt *cur_mtt;
+ size_t orig_sg_length;
+ struct mlx5_mtt *mtt;
+ size_t final_size;
+ struct ib_sge sg;
+ u64 offset = 0;
+ int err = 0;
+
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ mtt = mlx5r_umr_create_xlt(
+ dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
+ sizeof(*mtt), flags);
+ if (!mtt)
+ return -ENOMEM;
+
+ orig_sg_length = sg.length;
+
+ mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
+ mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
+ mr->page_shift);
+ mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
+
+ cur_mtt = mtt;
+ rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
+ mr->umem->sgt_append.sgt.nents,
+ BIT(mr->page_shift)) {
+ if (cur_mtt == (void *)mtt + sg.length) {
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
+ true);
+ if (err)
+ goto err;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ offset += sg.length;
+ mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
+
+ cur_mtt = mtt;
+ }
+
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+
+ if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+ cur_mtt->ptag = 0;
+
+ cur_mtt++;
+ }
+
+ final_size = (void *)cur_mtt - (void *)mtt;
+ sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
+ memset(cur_mtt, 0, sg.length - final_size);
+ mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+
+ dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+
+err:
+ sg.length = orig_sg_length;
+ mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
+ return err;
+}
+
+static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
+{
+ return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
+}
+
+int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags)
+{
+ int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+ ? sizeof(struct mlx5_klm)
+ : sizeof(struct mlx5_mtt);
+ const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ const int page_mask = page_align - 1;
+ struct mlx5r_umr_wqe wqe = {};
+ size_t pages_mapped = 0;
+ size_t pages_to_map = 0;
+ size_t size_to_map = 0;
+ size_t orig_sg_length;
+ size_t pages_iter;
+ struct ib_sge sg;
+ int err = 0;
+ void *xlt;
+
+ if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
+ !umr_can_use_indirect_mkey(dev))
+ return -EPERM;
+
+ if (WARN_ON(!mr->umem->is_odp))
+ return -EINVAL;
+
+ /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+ * so we need to align the offset and length accordingly
+ */
+ if (idx & page_mask) {
+ npages += idx & page_mask;
+ idx &= ~page_mask;
+ }
+ pages_to_map = ALIGN(npages, page_align);
+
+ xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
+ if (!xlt)
+ return -ENOMEM;
+
+ pages_iter = sg.length / desc_size;
+ orig_sg_length = sg.length;
+
+ if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+
+ pages_to_map = min_t(size_t, pages_to_map, max_pages);
+ }
+
+ mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
+ mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
+ mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
+
+ for (pages_mapped = 0;
+ pages_mapped < pages_to_map && !err;
+ pages_mapped += pages_iter, idx += pages_iter) {
+ npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
+ size_to_map = npages * desc_size;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
+
+ if (pages_mapped + pages_iter >= pages_to_map)
+ mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+ mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+ }
+ sg.length = orig_sg_length;
+ mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
new file mode 100644
index 000000000000..c9d0021381a2
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef _MLX5_IB_UMR_H
+#define _MLX5_IB_UMR_H
+
+#include "mlx5_ib.h"
+
+
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
+#define MLX5_IB_UMR_OCTOWORD 16
+#define MLX5_IB_UMR_XLT_ALIGNMENT 64
+
+int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
+void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
+
+static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
+ size_t length)
+{
+ /*
+ * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
+ * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
+ * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
+ * can never be enabled without this capability. Simplify this weird
+ * quirky hardware by just saying it can't use PAS lists with UMR at
+ * all.
+ */
+ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ return false;
+
+ /*
+ * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
+ * used.
+ */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
+ return false;
+ return true;
+}
+
+/*
+ * true if an existing MR can be reconfigured to new access_flags using UMR.
+ * Older HW cannot use UMR to update certain elements of the MKC. See
+ * get_umr_update_access_mask() and umr_check_mkey_mask()
+ */
+static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return false;
+
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return false;
+
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return false;
+
+ return true;
+}
+
+static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
+{
+ return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
+ MLX5_IB_UMR_OCTOWORD;
+}
+
+struct mlx5r_umr_context {
+ struct ib_cqe cqe;
+ enum ib_wc_status status;
+ struct completion done;
+};
+
+struct mlx5r_umr_wqe {
+ struct mlx5_wqe_umr_ctrl_seg ctrl_seg;
+ struct mlx5_mkey_seg mkey_seg;
+ struct mlx5_wqe_data_seg data_seg;
+};
+
+int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
+int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags);
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
+int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags);
+
+#endif /* _MLX5_IB_UMR_H */
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 51e48ca9016e..855f3f4fefad 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -7,6 +7,7 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/driver.h>
#include "wr.h"
+#include "umr.h"
static const u32 mlx5_ib_opcode[] = {
[IB_WR_SEND] = MLX5_OPCODE_SEND,
@@ -25,58 +26,7 @@ static const u32 mlx5_ib_opcode[] = {
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
};
-/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
- * next nearby edge and get new address translation for current WQE position.
- * @sq - SQ buffer.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @cur_edge: Updated current edge.
- */
-static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
- u32 wqe_sz, void **cur_edge)
-{
- u32 idx;
-
- if (likely(*seg != *cur_edge))
- return;
-
- idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
- *cur_edge = get_sq_edge(sq, idx);
-
- *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
-}
-
-/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
- * pointers. At the end @seg is aligned to 16B regardless the copied size.
- * @sq - SQ buffer.
- * @cur_edge: Updated current edge.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @src: Pointer to copy from.
- * @n: Number of bytes to copy.
- */
-static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
- void **seg, u32 *wqe_sz, const void *src,
- size_t n)
-{
- while (likely(n)) {
- size_t leftlen = *cur_edge - *seg;
- size_t copysz = min_t(size_t, leftlen, n);
- size_t stride;
-
- memcpy(*seg, src, copysz);
-
- n -= copysz;
- src += copysz;
- stride = !n ? ALIGN(copysz, 16) : copysz;
- *seg += stride;
- *wqe_sz += stride >> 4;
- handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
- }
-}
-
-static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
- struct ib_cq *ib_cq)
+int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
unsigned int cur;
@@ -122,9 +72,9 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
eseg->mss = cpu_to_be16(ud_wr->mss);
eseg->inline_hdr.sz = cpu_to_be16(left);
- /* memcpy_send_wqe should get a 16B align address. Hence, we
- * first copy up to the current edge and then, if needed,
- * continue to memcpy_send_wqe.
+ /* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
+ * we first copy up to the current edge and then, if needed,
+ * continue to mlx5r_memcpy_send_wqe.
*/
copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
left);
@@ -138,8 +88,8 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
left -= copysz;
pdata += copysz;
- memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
- left);
+ mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
+ pdata, left);
}
return;
@@ -165,12 +115,6 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static u64 get_xlt_octo(u64 bytes)
-{
- return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
- MLX5_IB_UMR_OCTOWORD;
-}
-
static __be64 frwr_mkey_mask(bool atomic)
{
u64 result;
@@ -222,7 +166,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
umr->flags = flags;
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
+ umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask(atomic);
}
@@ -233,134 +177,6 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
umr->flags = MLX5_UMR_INLINE;
}
-static __be64 get_umr_enable_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_disable_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_translation_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_access_mask(int atomic,
- int relaxed_ordering_write,
- int relaxed_ordering_read)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW;
-
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
- if (relaxed_ordering_write)
- result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
-
- if (relaxed_ordering_read)
- result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_pd_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_PD;
-
- return cpu_to_be64(result);
-}
-
-static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
-{
- if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
- return -EPERM;
-
- if (mask & MLX5_MKEY_MASK_A &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
- return -EPERM;
-
- if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
- !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
- return -EPERM;
-
- if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
- !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
- return -EPERM;
-
- return 0;
-}
-
-static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
- struct mlx5_wqe_umr_ctrl_seg *umr,
- const struct ib_send_wr *wr)
-{
- const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(umr, 0, sizeof(*umr));
-
- if (!umrwr->ignore_free_state) {
- if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
- /* fail if free */
- umr->flags = MLX5_UMR_CHECK_FREE;
- else
- /* fail if not free */
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- }
-
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
- u64 offset = get_xlt_octo(umrwr->offset);
-
- umr->xlt_offset = cpu_to_be16(offset & 0xffff);
- umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
- umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
- umr->mkey_mask |= get_umr_update_translation_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
- umr->mkey_mask |= get_umr_update_access_mask(
- !!(MLX5_CAP_GEN(dev->mdev, atomic)),
- !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
- !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
- umr->mkey_mask |= get_umr_update_pd_mask();
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
- umr->mkey_mask |= get_umr_enable_mr_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- umr->mkey_mask |= get_umr_disable_mr_mask();
-
- if (!wr->num_sge)
- umr->flags |= MLX5_UMR_INLINE;
-
- return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
-}
-
static u8 get_umr_flags(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
@@ -398,43 +214,6 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
seg->status = MLX5_MKEY_STATUS_FREE;
}
-static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
- struct mlx5_mkey_seg *seg,
- const struct ib_send_wr *wr)
-{
- const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(seg, 0, sizeof(*seg));
- if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- MLX5_SET(mkc, seg, free, 1);
-
- MLX5_SET(mkc, seg, a,
- !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, seg, rw,
- !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, seg, lr, 1);
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
- MLX5_SET(mkc, seg, relaxed_ordering_write,
- !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
- MLX5_SET(mkc, seg, relaxed_ordering_read,
- !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
-
- if (umrwr->pd)
- MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
- !umrwr->length)
- MLX5_SET(mkc, seg, length64, 1);
-
- MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
- MLX5_SET64(mkc, seg, len, umrwr->length);
- MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
- MLX5_SET(mkc, seg, qpn, 0xffffff);
- MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
-}
-
static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
struct mlx5_ib_mr *mr,
struct mlx5_ib_pd *pd)
@@ -760,7 +539,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
- seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
+ seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
@@ -770,7 +549,7 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
+ umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
@@ -870,7 +649,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
* Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
* kernel ULPs are not aware of it, so we don't set it here.
*/
- if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
+ if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
mlx5_ib_warn(
to_mdev(qp->ibqp.device),
"Fast update for MR access flags is not possible\n");
@@ -899,8 +678,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
if (umr_inline) {
- memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
- mr_list_size);
+ mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+ mr_list_size);
*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
} else {
set_reg_data_seg(*seg, mr, pd);
@@ -942,23 +721,22 @@ static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
}
}
-static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned int *idx,
- int *size, void **cur_edge, int nreq,
- bool send_signaled, bool solicited)
+int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
+ int *size, void **cur_edge, int nreq, __be32 general_id,
+ bool send_signaled, bool solicited)
{
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+ if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
- (*ctrl)->imm = send_ieth(wr);
+ (*ctrl)->general_id = general_id;
(*ctrl)->fm_ce_se = qp->sq_signal_bits |
- (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
+ (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
@@ -972,16 +750,14 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
const struct ib_send_wr *wr, unsigned int *idx, int *size,
void **cur_edge, int nreq)
{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
- wr->send_flags & IB_SEND_SIGNALED,
- wr->send_flags & IB_SEND_SOLICITED);
+ return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
+ send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
+ wr->send_flags & IB_SEND_SOLICITED);
}
-static void finish_wqe(struct mlx5_ib_qp *qp,
- struct mlx5_wqe_ctrl_seg *ctrl,
- void *seg, u8 size, void *cur_edge,
- unsigned int idx, u64 wr_id, int nreq, u8 fence,
- u32 mlx5_opcode)
+void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
+ void *seg, u8 size, void *cur_edge, unsigned int idx,
+ u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -1045,8 +821,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
/*
* SET_PSV WQEs are not signaled and solicited on error.
*/
- err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
- false, true);
+ err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
+ send_ieth(wr), false, true);
if (unlikely(err)) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -1057,8 +833,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
mlx5_ib_warn(dev, "\n");
goto out;
}
- finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
- next_fence, MLX5_OPCODE_SET_PSV);
+ mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+ nreq, next_fence, MLX5_OPCODE_SET_PSV);
out:
return err;
@@ -1098,8 +874,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
if (unlikely(err))
goto out;
- finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
- nreq, fence, MLX5_OPCODE_UMR);
+ mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
+ wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
if (unlikely(err)) {
@@ -1130,8 +906,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
mlx5_ib_warn(dev, "\n");
goto out;
}
- finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_UMR);
+ mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+ nreq, fence, MLX5_OPCODE_UMR);
sig_attrs = mr->ibmr.sig_attrs;
err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
@@ -1246,33 +1022,30 @@ static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
}
}
-static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- const struct ib_send_wr *wr,
- struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
- int *size, void **cur_edge, unsigned int idx)
+void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
+ struct mlx5_wqe_ctrl_seg *ctrl)
{
- int err = 0;
+ struct mlx5_bf *bf = &qp->bf;
- if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
- err = -EINVAL;
- mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
- goto out;
- }
+ qp->sq.head += nreq;
- qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
- err = set_reg_umr_segment(dev, *seg, wr);
- if (unlikely(err))
- goto out;
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_reg_mkey_segment(dev, *seg, wr);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-out:
- return err;
+ /* Make sure that descriptors are written before
+ * updating doorbell record and ringing the doorbell
+ */
+ wmb();
+
+ qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell.
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
+ /* Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ bf->offset ^= bf->buf_size;
}
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -1283,7 +1056,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_xrc_seg *xrc;
- struct mlx5_bf *bf;
void *cur_edge;
int size;
unsigned long flags;
@@ -1305,8 +1077,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->type == IB_QPT_GSI)
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
- bf = &qp->bf;
-
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -1384,12 +1154,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case IB_QPT_UD:
handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
break;
- case MLX5_IB_QPT_REG_UMR:
- err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
- &size, &cur_edge, idx);
- if (unlikely(err))
- goto out;
- break;
default:
break;
@@ -1418,35 +1182,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
qp->next_fence = next_fence;
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
- fence, mlx5_ib_opcode[wr->opcode]);
+ mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
+ nreq, fence, mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
}
out:
- if (likely(nreq)) {
- qp->sq.head += nreq;
-
- /* Make sure that descriptors are written before
- * updating doorbell record and ringing the doorbell
- */
- wmb();
-
- qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
-
- /* Make sure doorbell record is visible to the HCA before
- * we hit doorbell.
- */
- wmb();
-
- mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
- /* Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- bf->offset ^= bf->buf_size;
- }
+ if (likely(nreq))
+ mlx5r_ring_db(qp, nreq, ctrl);
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -1486,7 +1231,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;
diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h
index 4f0057516402..2dc89438000d 100644
--- a/drivers/infiniband/hw/mlx5/wr.h
+++ b/drivers/infiniband/hw/mlx5/wr.h
@@ -41,6 +41,66 @@ static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
return fragment_end + MLX5_SEND_WQE_BB;
}
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq: SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ u32 idx;
+
+ if (likely(*seg != *cur_edge))
+ return;
+
+ idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+ *cur_edge = get_sq_edge(sq, idx);
+
+ *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant
+ * WQ's pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq: SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+ void **seg, u32 *wqe_sz,
+ const void *src, size_t n)
+{
+ while (likely(n)) {
+ size_t leftlen = *cur_edge - *seg;
+ size_t copysz = min_t(size_t, leftlen, n);
+ size_t stride;
+
+ memcpy(*seg, src, copysz);
+
+ n -= copysz;
+ src += copysz;
+ stride = !n ? ALIGN(copysz, 16) : copysz;
+ *seg += stride;
+ *wqe_sz += stride >> 4;
+ handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+ }
+}
+
+int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq);
+int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
+ int *size, void **cur_edge, int nreq, __be32 general_id,
+ bool send_signaled, bool solicited);
+void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
+ void *seg, u8 size, void *cur_edge, unsigned int idx,
+ u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode);
+void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
+ struct mlx5_wqe_ctrl_seg *ctrl);
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr, bool drain);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index aef1d274a14e..9f0f79d02d3c 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -51,7 +51,7 @@ u32 mthca_alloc(struct mthca_alloc *alloc)
}
if (obj < alloc->max) {
- set_bit(obj, alloc->table);
+ __set_bit(obj, alloc->table);
obj |= alloc->top;
} else
obj = -1;
@@ -69,7 +69,7 @@ void mthca_free(struct mthca_alloc *alloc, u32 obj)
spin_lock_irqsave(&alloc->lock, flags);
- clear_bit(obj, alloc->table);
+ __clear_bit(obj, alloc->table);
alloc->last = min(alloc->last, obj);
alloc->top = (alloc->top + alloc->max) & alloc->mask;
@@ -79,8 +79,6 @@ void mthca_free(struct mthca_alloc *alloc, u32 obj)
int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
u32 reserved)
{
- int i;
-
/* num must be a power of 2 */
if (num != 1 << (ffs(num) - 1))
return -EINVAL;
@@ -90,21 +88,18 @@ int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
alloc->max = num;
alloc->mask = mask;
spin_lock_init(&alloc->lock);
- alloc->table = kmalloc_array(BITS_TO_LONGS(num), sizeof(long),
- GFP_KERNEL);
+ alloc->table = bitmap_zalloc(num, GFP_KERNEL);
if (!alloc->table)
return -ENOMEM;
- bitmap_zero(alloc->table, num);
- for (i = 0; i < reserved; ++i)
- set_bit(i, alloc->table);
+ bitmap_set(alloc->table, 0, reserved);
return 0;
}
void mthca_alloc_cleanup(struct mthca_alloc *alloc)
{
- kfree(alloc->table);
+ bitmap_free(alloc->table);
}
/*
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index bdf5ed38de22..f330ce895d88 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1252,7 +1252,7 @@ static void get_board_id(void *vsd, char *board_id)
if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
- strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
+ strscpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
} else {
/*
* The board ID is a string but the firmware byte
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index f507c4cd46d3..b54bc8865dae 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -939,12 +939,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
- dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_free_res;
- }
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+ goto err_free_res;
}
/* We can handle large RDMA requests, so allow larger segments. */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ce0e0867e488..192f83fd7c8a 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -101,13 +101,13 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
return -1;
found:
- clear_bit(seg, buddy->bits[o]);
+ __clear_bit(seg, buddy->bits[o]);
--buddy->num_free[o];
while (o > order) {
--o;
seg <<= 1;
- set_bit(seg ^ 1, buddy->bits[o]);
+ __set_bit(seg ^ 1, buddy->bits[o]);
++buddy->num_free[o];
}
@@ -125,13 +125,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
spin_lock(&buddy->lock);
while (test_bit(seg ^ 1, buddy->bits[order])) {
- clear_bit(seg ^ 1, buddy->bits[order]);
+ __clear_bit(seg ^ 1, buddy->bits[order]);
--buddy->num_free[order];
seg >>= 1;
++order;
}
- set_bit(seg, buddy->bits[order]);
+ __set_bit(seg, buddy->bits[order]);
++buddy->num_free[order];
spin_unlock(&buddy->lock);
@@ -139,7 +139,7 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
{
- int i, s;
+ int i;
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
@@ -152,22 +152,20 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
- s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
+ buddy->bits[i] = bitmap_zalloc(1 << (buddy->max_order - i),
+ GFP_KERNEL);
if (!buddy->bits[i])
goto err_out_free;
- bitmap_zero(buddy->bits[i],
- 1 << (buddy->max_order - i));
}
- set_bit(0, buddy->bits[buddy->max_order]);
+ __set_bit(0, buddy->bits[buddy->max_order]);
buddy->num_free[buddy->max_order] = 1;
return 0;
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ bitmap_free(buddy->bits[i]);
err_out:
kfree(buddy->bits);
@@ -181,7 +179,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
int i;
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ bitmap_free(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
@@ -469,8 +467,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
mpt_entry->start = cpu_to_be64(iova);
mpt_entry->length = cpu_to_be64(total_size);
- memset(&mpt_entry->lkey, 0,
- sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
+ memset_startat(mpt_entry, 0, lkey);
if (mr->mtt)
mpt_entry->mtt_seg =
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 7ea970774839..69af65f1b332 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -31,8 +31,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/string.h>
#include <linux/slab.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index ceee23ebc0f2..c46df53f26cf 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -50,14 +50,6 @@
#include <rdma/mthca-abi.h>
#include "mthca_memfree.h"
-static void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
@@ -78,7 +70,7 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
props->fw_ver = mdev->fw_ver;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(mdev, 1, 1,
@@ -140,7 +132,7 @@ static int mthca_query_port(struct ib_device *ibdev,
/* props being zeroed by the caller, avoid zeroing it here */
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -234,7 +226,7 @@ static int mthca_query_pkey(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -263,7 +255,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u32 port,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -274,7 +266,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u32 port,
memcpy(gid->raw, out_mad->data + 8, 8);
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -1006,7 +998,7 @@ static int mthca_init_node_data(struct mthca_dev *dev)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mthca_MAD_IFC(dev, 1, 1,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index c51c3f40700e..56f06c68f31a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1363,7 +1363,7 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
OCRDMA_HBA_ATTRB_PTNUM_MASK)
>> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
- strlcpy(dev->model_number,
+ strscpy(dev->model_number,
hba_attribs->controller_model_number,
sizeof(dev->model_number));
}
@@ -1506,7 +1506,6 @@ int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
{
int status = -ENOMEM;
- size_t pd_bitmap_size;
struct ocrdma_alloc_pd_range *cmd;
struct ocrdma_alloc_pd_range_rsp *rsp;
@@ -1528,10 +1527,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid &
OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
dev->pd_mgr->max_dpp_pd = rsp->pd_count;
- pd_bitmap_size =
- BITS_TO_LONGS(rsp->pd_count) * sizeof(long);
- dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size,
- GFP_KERNEL);
+ dev->pd_mgr->pd_dpp_bitmap = bitmap_zalloc(rsp->pd_count,
+ GFP_KERNEL);
}
kfree(cmd);
}
@@ -1547,9 +1544,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
dev->pd_mgr->pd_norm_start = rsp->dpp_page_pdid &
OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
dev->pd_mgr->max_normal_pd = rsp->pd_count;
- pd_bitmap_size = BITS_TO_LONGS(rsp->pd_count) * sizeof(long);
- dev->pd_mgr->pd_norm_bitmap = kzalloc(pd_bitmap_size,
- GFP_KERNEL);
+ dev->pd_mgr->pd_norm_bitmap = bitmap_zalloc(rsp->pd_count,
+ GFP_KERNEL);
}
kfree(cmd);
@@ -1611,8 +1607,8 @@ void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev)
static void ocrdma_free_pd_pool(struct ocrdma_dev *dev)
{
ocrdma_mbx_dealloc_pd_range(dev);
- kfree(dev->pd_mgr->pd_norm_bitmap);
- kfree(dev->pd_mgr->pd_dpp_bitmap);
+ bitmap_free(dev->pd_mgr->pd_norm_bitmap);
+ bitmap_free(dev->pd_mgr->pd_dpp_bitmap);
kfree(dev->pd_mgr);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7abf6cf1e937..5d4b3bc16493 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -62,20 +62,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
MODULE_AUTHOR("Emulex Corporation");
MODULE_LICENSE("Dual BSD/GPL");
-void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
-{
- u8 mac_addr[6];
-
- memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
- guid[0] = mac_addr[0] ^ 2;
- guid[1] = mac_addr[1];
- guid[2] = mac_addr[2];
- guid[3] = 0xff;
- guid[4] = 0xfe;
- guid[5] = mac_addr[3];
- guid[6] = mac_addr[4];
- guid[7] = mac_addr[5];
-}
static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
u32 port_num)
{
@@ -203,7 +189,8 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
{
int ret;
- ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
+ addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
+ dev->nic_info.mac_addr);
BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
sizeof(OCRDMA_NODE_DESC));
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 735123d0e9ec..dd4021b11963 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -41,6 +41,7 @@
*/
#include <linux/dma-mapping.h>
+#include <net/addrconf.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/iw_cm.h>
@@ -74,7 +75,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
memset(attr, 0, sizeof *attr);
memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
- ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
+ addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
+ dev->nic_info.mac_addr);
attr->max_mr_size = dev->attr.max_mr_size;
attr->page_size_cap = 0xffff000;
attr->vendor_id = dev->nic_info.pdev->vendor;
@@ -88,8 +90,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
attr->max_send_sge = dev->attr.max_send_sge;
attr->max_recv_sge = dev->attr.max_recv_sge;
attr->max_sge_rd = dev->attr.max_rdma_sge;
@@ -245,13 +247,13 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
{
u16 pd_bitmap_idx = 0;
- const unsigned long *pd_bitmap;
+ unsigned long *pd_bitmap;
if (dpp_pool) {
pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
dev->pd_mgr->max_dpp_pd);
- __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap);
+ __set_bit(pd_bitmap_idx, pd_bitmap);
dev->pd_mgr->pd_dpp_count++;
if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
@@ -259,7 +261,7 @@ static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
dev->pd_mgr->max_normal_pd);
- __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap);
+ __set_bit(pd_bitmap_idx, pd_bitmap);
dev->pd_mgr->pd_norm_count++;
if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
@@ -1844,12 +1846,10 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq,
int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
- int status;
struct ocrdma_srq *srq;
srq = get_ocrdma_srq(ibsrq);
- status = ocrdma_mbx_query_srq(srq, srq_attr);
- return status;
+ return ocrdma_mbx_query_srq(srq, srq_attr);
}
int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
@@ -1960,7 +1960,6 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
const struct ib_send_wr *wr)
{
- int status;
struct ocrdma_sge *sge;
u32 wqe_size = sizeof(*hdr);
@@ -1972,8 +1971,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
sge = (struct ocrdma_sge *)(hdr + 1);
}
- status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
- return status;
+ return ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
}
static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index b73d742a520c..f860b7fcef33 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -59,7 +59,6 @@ int ocrdma_query_port(struct ib_device *ibdev, u32 port,
enum rdma_protocol_type
ocrdma_query_protocol(struct ib_device *device, u32 port_num);
-void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 65ce6d0f1885..ba0c3e4c07d8 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -344,6 +344,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
if (IS_IWARP(dev)) {
xa_init(&dev->qps);
dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
+ if (!dev->iwarp_wq) {
+ rc = -ENOMEM;
+ goto err1;
+ }
}
/* Allocate Status blocks for CNQ */
@@ -351,7 +355,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
GFP_KERNEL);
if (!dev->sb_array) {
rc = -ENOMEM;
- goto err1;
+ goto err_destroy_wq;
}
dev->cnq_array = kcalloc(dev->num_cnq,
@@ -402,6 +406,9 @@ err3:
kfree(dev->cnq_array);
err2:
kfree(dev->sb_array);
+err_destroy_wq:
+ if (IS_IWARP(dev))
+ destroy_workqueue(dev->iwarp_wq);
err1:
kfree(dev->sgid_tbl);
return rc;
@@ -500,7 +507,6 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
if (dev->int_info.msix_cnt) {
idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
vector = dev->int_info.msix[idx].vector;
- synchronize_irq(vector);
free_irq(vector, &dev->cnq_array[i]);
}
}
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 8def88cfa300..db9ef3e1eb97 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -418,6 +418,7 @@ struct qedr_qp {
u32 sq_psn;
u32 qkey;
u32 dest_qp_num;
+ u8 timeout;
/* Relevant to qps created from kernel space only (ULPs) */
u8 prev_wqe_size;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 9100009f0a23..d745ce9dc88a 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -134,7 +134,8 @@ int qedr_query_device(struct ib_device *ibdev,
attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
if (!rdma_protocol_iwarp(&dev->ibdev, 1))
attr->device_cap_flags |= IB_DEVICE_XRC;
@@ -1931,6 +1932,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
/* db offset was calculated in copy_qp_uresp, now set in the user q */
if (qedr_qp_has_sq(qp)) {
qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+ qp->sq.max_wr = attrs->cap.max_send_wr;
rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
&qp->usq.db_rec_data->db_data,
DB_REC_WIDTH_32B,
@@ -1941,6 +1943,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
if (qedr_qp_has_rq(qp)) {
qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+ qp->rq.max_wr = attrs->cap.max_recv_wr;
rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
&qp->urq.db_rec_data->db_data,
DB_REC_WIDTH_32B,
@@ -2610,6 +2613,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1 << max_t(int, attr->timeout - 8, 0);
else
qp_params.ack_timeout = 0;
+
+ qp->timeout = attr->timeout;
}
if (attr_mask & IB_QP_RETRY_CNT) {
@@ -2769,7 +2774,7 @@ int qedr_query_qp(struct ib_qp *ibqp,
rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
rdma_ah_set_sl(&qp_attr->ah_attr, 0);
- qp_attr->timeout = params.timeout;
+ qp_attr->timeout = qp->timeout;
qp_attr->rnr_retry = params.rnr_retry;
qp_attr->retry_cnt = params.retry_cnt;
qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
@@ -3079,7 +3084,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
else
DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
- goto err0;
+ goto err1;
}
/* Index only, 18 bit long, lkey = itid << 8 | key */
@@ -3103,7 +3108,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
if (rc) {
DP_ERR(dev, "roce register tid returned an error %d\n", rc);
- goto err1;
+ goto err2;
}
mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
@@ -3112,8 +3117,10 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
return mr;
-err1:
+err2:
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
err0:
kfree(mr);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 9363bccfc6e7..26c615772be3 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -196,7 +196,7 @@ struct qib_ctxtdata {
pid_t pid;
pid_t subpid[QLOGIC_IB_MAX_SUBCTXT];
/* same size as task_struct .comm[], command that opened context */
- char comm[16];
+ char comm[TASK_COMM_LEN];
/* pkeys set by this use of this ctxt */
u16 pkeys[4];
/* so file ops can get at unit */
@@ -321,7 +321,7 @@ struct qib_verbs_txreq {
* These 7 values (SDR, DDR, and QDR may be ORed for auto-speed
* negotiation) are used for the 3rd argument to path_f_set_ib_cfg
* with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
- * are also the the possible values for qib_link_speed_enabled and active
+ * are also the possible values for qib_link_speed_enabled and active
* The values were chosen to match values used within the IB spec.
*/
#define QIB_IB_SDR 1
@@ -678,7 +678,7 @@ struct qib_pportdata {
/* Observers. Not to be taken lightly, possibly not to ship. */
/*
* If a diag read or write is to (bottom <= offset <= top),
- * the "hoook" is called, allowing, e.g. shadows to be
+ * the "hook" is called, allowing, e.g. shadows to be
* updated in sync with the driver. struct diag_observer
* is the "visible" part.
*/
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 63854f4b6524..3937144b2ae5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -153,7 +153,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt;
/*
* for this use, may be cfgctxts summed over all chips that
- * are are configured and present
+ * are configured and present
*/
kinfo->spi_nctxts = dd->cfgctxts;
/* unit (chip/board) our context is on */
@@ -851,7 +851,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
ret = -EPERM;
goto bail;
}
- /* don't allow them to later change to writeable with mprotect */
+ /* don't allow them to later change to writable with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
@@ -941,7 +941,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
goto bail;
}
/*
- * Don't allow permission to later change to writeable
+ * Don't allow permission to later change to writable
* with mprotect.
*/
vma->vm_flags &= ~VM_MAYWRITE;
@@ -1321,7 +1321,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
rcd->tid_pg_list = ptmp;
rcd->pid = current->pid;
init_waitqueue_head(&dd->rcd[ctxt]->wait);
- strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
+ get_task_comm(rcd->comm, current);
ctxt_fp(fp) = rcd;
qib_stats.sps_ctxts++;
dd->freectxts--;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index a0c5f3bdc324..a973905afd13 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/fs_context.h>
#include <linux/mount.h>
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a9b83bc13f4a..aea571943768 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3030,7 +3030,7 @@ static int qib_6120_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
/* Does read/modify/write to appropriate registers to
* set output and direction bits selected by mask.
- * these are in their canonical postions (e.g. lsb of
+ * these are in their canonical positions (e.g. lsb of
* dir will end up in D48 of extctrl on existing chips).
* returns contents of GP Inputs.
*/
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index d1c0bc31869f..6af57067c32e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -58,7 +58,7 @@ static void qib_set_ib_7220_lstate(struct qib_pportdata *, u16, u16);
/*
* This file contains almost all the chip-specific register information and
* access functions for the QLogic QLogic_IB 7220 PCI-Express chip, with the
- * exception of SerDes support, which in in qib_sd7220.c.
+ * exception of SerDes support, which in qib_sd7220.c.
*/
/* Below uses machine-generated qib_chipnum_regs.h file */
@@ -634,7 +634,7 @@ static const struct qib_hwerror_msgs qib_7220_hwerror_msgs[] = {
QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT,
"PCIe completion timeout"),
/*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * In practice, it's unlikely that we'll see PCIe PLL, or bus
* parity or memory parity error failures, because most likely we
* won't be able to talk to the core of the chip. Nonetheless, we
* might see them, if they are in parts of the PCIe core that aren't
@@ -2988,7 +2988,7 @@ done:
* the utility. Names need to be 12 chars or less (w/o newline), for proper
* display by utility.
* Non-error counters are first.
- * Start of "error" conters is indicated by a leading "E " on the first
+ * Start of "error" counters is indicated by a leading "E " on the first
* "error" counter, and doesn't count in label length.
* The EgrOvfl list needs to be last so we truncate them at the configured
* context count for the device.
@@ -3742,7 +3742,7 @@ static int qib_7220_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
/*
* Does read/modify/write to appropriate registers to
* set output and direction bits selected by mask.
- * these are in their canonical postions (e.g. lsb of
+ * these are in their canonical positions (e.g. lsb of
* dir will end up in D48 of extctrl on existing chips).
* returns contents of GP Inputs.
*/
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ab98b6a3ae1e..9d2dd135b784 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2124,7 +2124,7 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcpy(msg,
+ strscpy(msg,
"[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
/* ignore from now on, so disable until driver reloaded */
@@ -2850,9 +2850,9 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
qib_7322_free_irq(dd);
kfree(dd->cspec->cntrs);
- kfree(dd->cspec->sendchkenable);
- kfree(dd->cspec->sendgrhchk);
- kfree(dd->cspec->sendibchk);
+ bitmap_free(dd->cspec->sendchkenable);
+ bitmap_free(dd->cspec->sendgrhchk);
+ bitmap_free(dd->cspec->sendibchk);
kfree(dd->cspec->msix_entries);
for (i = 0; i < dd->num_pports; i++) {
unsigned long flags;
@@ -5665,7 +5665,7 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
/*
* Does read/modify/write to appropriate registers to
* set output and direction bits selected by mask.
- * these are in their canonical postions (e.g. lsb of
+ * these are in their canonical positions (e.g. lsb of
* dir will end up in D48 of extctrl on existing chips).
* returns contents of GP Inputs.
*/
@@ -6383,18 +6383,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
features = qib_7322_boardname(dd);
/* now that piobcnt2k and 4k set, we can allocate these */
- sbufcnt = dd->piobcnt2k + dd->piobcnt4k +
- NUM_VL15_BUFS + BITS_PER_LONG - 1;
- sbufcnt /= BITS_PER_LONG;
- dd->cspec->sendchkenable =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendchkenable),
- GFP_KERNEL);
- dd->cspec->sendgrhchk =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendgrhchk),
- GFP_KERNEL);
- dd->cspec->sendibchk =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendibchk),
- GFP_KERNEL);
+ sbufcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS;
+
+ dd->cspec->sendchkenable = bitmap_zalloc(sbufcnt, GFP_KERNEL);
+ dd->cspec->sendgrhchk = bitmap_zalloc(sbufcnt, GFP_KERNEL);
+ dd->cspec->sendibchk = bitmap_zalloc(sbufcnt, GFP_KERNEL);
if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
!dd->cspec->sendibchk) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d1a72e89e297..45211008449f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1106,8 +1106,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (!qib_cpulist_count) {
u32 count = num_online_cpus();
- qib_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long),
- GFP_KERNEL);
+ qib_cpulist = bitmap_zalloc(count, GFP_KERNEL);
if (qib_cpulist)
qib_cpulist_count = count;
}
@@ -1279,7 +1278,7 @@ static void __exit qib_ib_cleanup(void)
#endif
qib_cpulist_count = 0;
- kfree(qib_cpulist);
+ bitmap_free(qib_cpulist);
WARN_ON(!xa_empty(&qib_dev_table));
qib_dev_cleanup();
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index cb2a02d671e2..692b64efad97 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -295,7 +295,7 @@ void qib_free_irq(struct qib_devdata *dd)
* Setup pcie interrupt stuff again after a reset. I'd like to just call
* pci_enable_msi() again for msi, but when I do that,
* the MSI enable bit doesn't get set in the command word, and
- * we switch to to a different interrupt vector, which is confusing,
+ * we switch to a different interrupt vector, which is confusing,
* so I instead just do it all inline. Perhaps somehow can tie this
* into the PCIe hotplug support at some point
*/
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 81b810d006c0..1dc3ccf0cf1f 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -587,7 +587,7 @@ static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
/* Need to release */
u64 pollval;
/*
- * The only writeable bits are the request and CS.
+ * The only writable bits are the request and CS.
* Both should be clear
*/
u64 newval = 0;
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 0a3b28142c05..41c272980f91 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -541,7 +541,7 @@ static struct attribute *port_diagc_attributes[] = {
};
static const struct attribute_group port_diagc_group = {
- .name = "linkcontrol",
+ .name = "diag_counters",
.attrs = port_diagc_attributes,
};
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index ac11943a5ddb..bf2f30d67949 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -941,7 +941,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
&addrlimit) ||
addrlimit > type_max(typeof(pkt->addrlimit))) {
ret = -EINVAL;
- goto free_pbc;
+ goto free_pkt;
}
pkt->addrlimit = addrlimit;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index ef91bff5c23c..0080f0be72fe 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -425,7 +425,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
}
#endif
-static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
+static void qib_copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
u32 length, unsigned flush_wc)
{
u32 extra = 0;
@@ -975,7 +975,7 @@ static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
qib_pio_copy(piobuf, addr, dwords);
goto done;
}
- copy_io(piobuf, ss, len, flush_wc);
+ qib_copy_io(piobuf, ss, len, flush_wc);
done:
if (dd->flags & QIB_USE_SPCL_TRIG) {
u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index e5a3f02fb078..10a8cd5ba076 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -32,7 +32,6 @@
*/
#include <linux/debugfs.h>
-#include <linux/module.h>
#include "usnic.h"
#include "usnic_log.h"
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index d346dd48e731..46653ad56f5a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -534,6 +534,11 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
struct usnic_ib_vf *vf;
enum usnic_vnic_res_type res_type;
+ if (!device_iommu_mapped(&pdev->dev)) {
+ usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
+ return -EPERM;
+ }
+
vf = kzalloc(sizeof(*vf), GFP_KERNEL);
if (!vf)
return -ENOMEM;
@@ -642,12 +647,6 @@ static int __init usnic_ib_init(void)
printk_once(KERN_INFO "%s", usnic_version);
- err = usnic_uiom_init(DRV_NAME);
- if (err) {
- usnic_err("Unable to initialize umem with err %d\n", err);
- return err;
- }
-
err = pci_register_driver(&usnic_ib_pci_driver);
if (err) {
usnic_err("Unable to register with PCI\n");
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 3b60fa9cb58d..59bfbfaee325 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -32,7 +32,6 @@
*/
#include <linux/bug.h>
#include <linux/errno.h>
-#include <linux/module.h>
#include <linux/spinlock.h>
#include "usnic_log.h"
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 586b0e52ba7f..fdb63a8fb997 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -31,7 +31,6 @@
*
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -243,10 +242,11 @@ static struct attribute *usnic_ib_qpn_default_attrs[] = {
&qpn_attr_summary.attr,
NULL
};
+ATTRIBUTE_GROUPS(usnic_ib_qpn_default);
static struct kobj_type usnic_ib_qpn_type = {
.sysfs_ops = &usnic_ib_qpn_sysfs_ops,
- .default_attrs = usnic_ib_qpn_default_attrs
+ .default_groups = usnic_ib_qpn_default_groups,
};
int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 756a83bcff58..6e8c4fbb8083 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/errno.h>
@@ -306,7 +305,8 @@ int usnic_ib_query_device(struct ib_device *ibdev,
props->max_qp = qp_per_vf *
kref_read(&us_ibdev->vf_cnt);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
- IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_SYS_IMAGE_GUID;
+ props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
kref_read(&us_ibdev->vf_cnt);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
@@ -442,12 +442,10 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct usnic_ib_pd *pd = to_upd(ibpd);
- void *umem_pd;
- umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
- if (IS_ERR_OR_NULL(umem_pd)) {
- return umem_pd ? PTR_ERR(umem_pd) : -ENOMEM;
- }
+ pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent);
+ if (IS_ERR(pd->umem_pd))
+ return PTR_ERR(pd->umem_pd);
return 0;
}
@@ -709,4 +707,3 @@ int usnic_ib_mmap(struct ib_ucontext *context,
usnic_err("No VF %u found\n", vfid);
return -EINVAL;
}
-
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index 82dd810bc000..dc37066900a5 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -32,7 +32,6 @@
*/
#include <linux/bitmap.h>
#include <linux/file.h>
-#include <linux/module.h>
#include <linux/slab.h>
#include <net/inet_sock.h>
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 760b254ba42d..67923ced6e2d 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -40,7 +40,6 @@
#include <linux/iommu.h>
#include <linux/workqueue.h>
#include <linux/list.h>
-#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include "usnic_log.h"
@@ -96,7 +95,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
int ret;
int off;
int i;
- int flags;
dma_addr_t pa;
unsigned int gup_flags;
struct mm_struct *mm;
@@ -133,8 +131,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
goto out;
}
- flags = IOMMU_READ | IOMMU_CACHE;
- flags |= (writable) ? IOMMU_WRITE : 0;
gup_flags = FOLL_WRITE;
gup_flags |= (writable) ? 0 : FOLL_FORCE;
cur_base = addr & PAGE_MASK;
@@ -439,7 +435,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr)
__usnic_uiom_release_tail(uiomr);
}
-struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev)
{
struct usnic_uiom_pd *pd;
void *domain;
@@ -448,7 +444,7 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
if (!pd)
return ERR_PTR(-ENOMEM);
- pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
+ pd->domain = domain = iommu_domain_alloc(dev->bus);
if (!domain) {
usnic_err("Failed to allocate IOMMU domain");
kfree(pd);
@@ -483,7 +479,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
if (err)
goto out_free_dev;
- if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
usnic_err("IOMMU of %s does not support cache coherency\n",
dev_name(dev));
err = -EINVAL;
@@ -556,13 +552,3 @@ void usnic_uiom_free_dev_list(struct device **devs)
{
kfree(devs);
}
-
-int usnic_uiom_init(char *drv_name)
-{
- if (!iommu_present(&pci_bus_type)) {
- usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
- return -EPERM;
- }
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 7ec8991ace67..5a9acf941510 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -80,7 +80,7 @@ struct usnic_uiom_chunk {
struct scatterlist page_list[];
};
-struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev);
void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
@@ -91,5 +91,4 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
unsigned long addr, size_t size,
int access, int dmasync);
void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr);
-int usnic_uiom_init(char *drv_name);
#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index ebe08f348453..0c47f73aaed5 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -31,7 +31,6 @@
*
*/
#include <linux/errno.h>
-#include <linux/module.h>
#include <linux/pci.h>
#include "usnic_ib.h"
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
index bf51357ea3aa..9a4de962e947 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
@@ -63,12 +63,12 @@ int pvrdma_uar_table_init(struct pvrdma_dev *dev)
tbl->max = num;
tbl->mask = mask;
spin_lock_init(&tbl->lock);
- tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL);
+ tbl->table = bitmap_zalloc(num, GFP_KERNEL);
if (!tbl->table)
return -ENOMEM;
/* 0th UAR is taken by the device. */
- set_bit(0, tbl->table);
+ __set_bit(0, tbl->table);
return 0;
}
@@ -77,7 +77,7 @@ void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev)
{
struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
- kfree(tbl->table);
+ bitmap_free(tbl->table);
}
int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
@@ -100,7 +100,7 @@ int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
return -ENOMEM;
}
- set_bit(obj, tbl->table);
+ __set_bit(obj, tbl->table);
obj |= tbl->top;
spin_unlock_irqrestore(&tbl->lock, flags);
@@ -120,7 +120,7 @@ void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
obj = uar->index & (tbl->max - 1);
spin_lock_irqsave(&tbl->lock, flags);
- clear_bit(obj, tbl->table);
+ __clear_bit(obj, tbl->table);
tbl->last = min(tbl->last, obj);
tbl->top = (tbl->top + tbl->max) & tbl->mask;
spin_unlock_irqrestore(&tbl->lock, flags);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 105f3a155939..343288b02792 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -811,12 +811,10 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
}
/* Enable 64-Bit DMA */
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) {
- ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
- if (ret != 0) {
- dev_err(&pdev->dev, "dma_set_mask failed\n");
- goto err_free_resource;
- }
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_err(&pdev->dev, "dma_set_mask failed\n");
+ goto err_free_resource;
}
dma_set_max_seg_size(&pdev->dev, UINT_MAX);
pci_set_master(pdev);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 3305f2744bfa..3acab569fbb9 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -2775,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
EXPORT_SYMBOL(rvt_qp_iter);
/*
- * This should be called with s_lock held.
+ * This should be called with s_lock and r_lock held.
*/
void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
@@ -3073,6 +3073,8 @@ do_write:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
+ if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
+ goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
@@ -3132,7 +3134,9 @@ send_comp:
rvp->n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ spin_lock(&sqp->r_lock);
rvt_send_complete(sqp, wqe, send_status);
+ spin_unlock(&sqp->r_lock);
if (local_ops) {
atomic_dec(&sqp->local_ops_pending);
local_ops = 0;
@@ -3186,9 +3190,15 @@ serr:
spin_unlock_irqrestore(&qp->r_lock, flags);
serr_no_r_lock:
spin_lock_irqsave(&sqp->s_lock, flags);
+ spin_lock(&sqp->r_lock);
rvt_send_complete(sqp, wqe, send_status);
+ spin_unlock(&sqp->r_lock);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+ int lastwqe;
+
+ spin_lock(&sqp->r_lock);
+ lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+ spin_unlock(&sqp->r_lock);
sqp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 59481ae39505..d61f8de7f21c 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -15,7 +15,7 @@
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("RDMA Verbs Transport Library");
-static int rvt_init(void)
+static int __init rvt_init(void)
{
int ret = rvt_driver_cq_init();
@@ -26,7 +26,7 @@ static int rvt_init(void)
}
module_init(rvt_init);
-static void rvt_cleanup(void)
+static void __exit rvt_cleanup(void)
{
rvt_cq_exit();
}
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 1e24673e9318..5395a581f4bb 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -22,5 +22,4 @@ rdma_rxe-y := \
rxe_mcast.o \
rxe_task.o \
rxe_net.o \
- rxe_sysfs.o \
rxe_hw_counters.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 8e0f9c489cab..51daac5c4feb 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -13,8 +13,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
MODULE_LICENSE("Dual BSD/GPL");
-bool rxe_initialized;
-
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
@@ -30,8 +28,8 @@ void rxe_dealloc(struct ib_device *ib_dev)
rxe_pool_cleanup(&rxe->cq_pool);
rxe_pool_cleanup(&rxe->mr_pool);
rxe_pool_cleanup(&rxe->mw_pool);
- rxe_pool_cleanup(&rxe->mc_grp_pool);
- rxe_pool_cleanup(&rxe->mc_elem_pool);
+
+ WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree));
if (rxe->tfm)
crypto_free_shash(rxe->tfm);
@@ -48,6 +46,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_qp = RXE_MAX_QP;
rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
+ rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG;
rxe->attr.max_send_sge = RXE_MAX_SGE;
rxe->attr.max_recv_sge = RXE_MAX_SGE;
rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
@@ -116,106 +115,37 @@ static void rxe_init_ports(struct rxe_dev *rxe)
}
/* init pools of managed objects */
-static int rxe_init_pools(struct rxe_dev *rxe)
+static void rxe_init_pools(struct rxe_dev *rxe)
{
- int err;
-
- err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC,
- rxe->max_ucontext);
- if (err)
- goto err1;
-
- err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD,
- rxe->attr.max_pd);
- if (err)
- goto err2;
-
- err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,
- rxe->attr.max_ah);
- if (err)
- goto err3;
-
- err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,
- rxe->attr.max_srq);
- if (err)
- goto err4;
-
- err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,
- rxe->attr.max_qp);
- if (err)
- goto err5;
-
- err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,
- rxe->attr.max_cq);
- if (err)
- goto err6;
-
- err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,
- rxe->attr.max_mr);
- if (err)
- goto err7;
-
- err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,
- rxe->attr.max_mw);
- if (err)
- goto err8;
-
- err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,
- rxe->attr.max_mcast_grp);
- if (err)
- goto err9;
-
- err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
- rxe->attr.max_total_mcast_qp_attach);
- if (err)
- goto err10;
-
- return 0;
-
-err10:
- rxe_pool_cleanup(&rxe->mc_grp_pool);
-err9:
- rxe_pool_cleanup(&rxe->mw_pool);
-err8:
- rxe_pool_cleanup(&rxe->mr_pool);
-err7:
- rxe_pool_cleanup(&rxe->cq_pool);
-err6:
- rxe_pool_cleanup(&rxe->qp_pool);
-err5:
- rxe_pool_cleanup(&rxe->srq_pool);
-err4:
- rxe_pool_cleanup(&rxe->ah_pool);
-err3:
- rxe_pool_cleanup(&rxe->pd_pool);
-err2:
- rxe_pool_cleanup(&rxe->uc_pool);
-err1:
- return err;
+ rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC);
+ rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD);
+ rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH);
+ rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ);
+ rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP);
+ rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ);
+ rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR);
+ rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW);
}
/* initialize rxe device state */
-static int rxe_init(struct rxe_dev *rxe)
+static void rxe_init(struct rxe_dev *rxe)
{
- int err;
-
/* init default device parameters */
rxe_init_device_param(rxe);
rxe_init_ports(rxe);
-
- err = rxe_init_pools(rxe);
- if (err)
- return err;
+ rxe_init_pools(rxe);
/* init pending mmap list */
spin_lock_init(&rxe->mmap_offset_lock);
spin_lock_init(&rxe->pending_lock);
INIT_LIST_HEAD(&rxe->pending_mmaps);
- mutex_init(&rxe->usdev_lock);
+ /* init multicast support */
+ spin_lock_init(&rxe->mcg_lock);
+ rxe->mcg_tree = RB_ROOT;
- return 0;
+ mutex_init(&rxe->usdev_lock);
}
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
@@ -237,12 +167,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
*/
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name)
{
- int err;
-
- err = rxe_init(rxe);
- if (err)
- return err;
-
+ rxe_init(rxe);
rxe_set_mtu(rxe, mtu);
return rxe_register_device(rxe, ibdev_name);
@@ -290,7 +215,6 @@ static int __init rxe_module_init(void)
return err;
rdma_link_register(&rxe_link_ops);
- rxe_initialized = true;
pr_info("loaded\n");
return 0;
}
@@ -301,7 +225,6 @@ static void __exit rxe_module_exit(void)
ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
- rxe_initialized = false;
pr_info("unloaded\n");
}
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 1bb3fb618bf5..30fbdf3bc76a 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -12,7 +12,6 @@
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
#include <linux/skbuff.h>
#include <rdma/ib_verbs.h>
@@ -39,8 +38,6 @@
#define RXE_ROCE_V2_SPORT (0xc000)
-extern bool rxe_initialized;
-
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 38c7b6fb39d7..3b05314ca739 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr)
av->network_type = type;
}
-struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp)
{
struct rxe_ah *ah;
u32 ah_num;
+ if (ahp)
+ *ahp = NULL;
+
if (!pkt || !pkt->qp)
return NULL;
@@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
if (ah_num) {
/* only new user provider or kernel client */
ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
- if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) {
+ if (!ah) {
pr_warn("Unable to find AH matching ah_num\n");
return NULL;
}
+
+ if (rxe_ah_pd(ah) != pkt->qp->pd) {
+ pr_warn("PDs don't match for AH and QP\n");
+ rxe_put(ah);
+ return NULL;
+ }
+
+ if (ahp)
+ *ahp = ah;
+ else
+ rxe_put(ah);
+
return &ah->av;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index d771ba8449a1..fb0c008af78c 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
+ pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+
if (qp->valid) {
qp->comp.timeout = 1;
rxe_run_task(&qp->comp.task, 1);
@@ -458,8 +460,6 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
- unsigned long flags;
-
if (wqe->has_rd_atomic) {
wqe->has_rd_atomic = 0;
atomic_inc(&qp->req.rd_atomic);
@@ -472,11 +472,11 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* state_lock used by requester & completer */
- spin_lock_irqsave(&qp->state_lock, flags);
+ spin_lock_bh(&qp->state_lock);
if ((qp->req.state == QP_STATE_DRAIN) &&
(qp->comp.psn == qp->req.psn)) {
qp->req.state = QP_STATE_DRAINED;
- spin_unlock_irqrestore(&qp->state_lock, flags);
+ spin_unlock_bh(&qp->state_lock);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -488,7 +488,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
qp->ibqp.qp_context);
}
} else {
- spin_unlock_irqrestore(&qp->state_lock, flags);
+ spin_unlock_bh(&qp->state_lock);
}
}
@@ -528,7 +528,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
struct rxe_queue *q = qp->sq.queue;
while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
@@ -550,7 +550,7 @@ static void free_pkt(struct rxe_pkt_info *pkt)
struct ib_device *dev = qp->ibqp.device;
kfree_skb(skb);
- rxe_drop_ref(qp);
+ rxe_put(qp);
ib_device_put(dev);
}
@@ -562,16 +562,16 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
- int ret = 0;
+ int ret;
- rxe_add_ref(qp);
+ if (!rxe_get(qp))
+ return -EAGAIN;
- if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
- qp->req.state == QP_STATE_RESET) {
+ if (!qp->valid || qp->comp.state == QP_STATE_ERROR ||
+ qp->comp.state == QP_STATE_RESET) {
rxe_drain_resp_pkts(qp, qp->valid &&
- qp->req.state == QP_STATE_ERROR);
- ret = -EAGAIN;
- goto done;
+ qp->comp.state == QP_STATE_ERROR);
+ goto exit;
}
if (qp->comp.timeout) {
@@ -581,10 +581,8 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
- if (qp->req.need_retry) {
- ret = -EAGAIN;
- goto done;
- }
+ if (qp->req.need_retry)
+ goto exit;
state = COMPST_GET_ACK;
@@ -677,8 +675,7 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- ret = -EAGAIN;
- goto done;
+ goto exit;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@@ -690,10 +687,8 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted)) {
- ret = -EAGAIN;
- goto done;
- }
+ if (!wqe || (wqe->state == wqe_state_posted))
+ goto exit;
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
@@ -731,18 +726,21 @@ int rxe_completer(void *arg)
break;
case COMPST_RNR_RETRY:
+ /* we come here if we received an RNR NAK */
if (qp->comp.rnr_retry > 0) {
if (qp->comp.rnr_retry != 7)
qp->comp.rnr_retry--;
- qp->req.need_retry = 1;
+ /* don't start a retry flow until the
+ * rnr timer has fired
+ */
+ qp->req.wait_for_rnr_timer = 1;
pr_debug("qp#%d set rnr nak timer\n",
qp_num(qp));
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
- ret = -EAGAIN;
- goto done;
+ goto exit;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -755,15 +753,23 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
- ret = -EAGAIN;
- goto done;
+ goto exit;
}
}
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_completer
+ */
done:
+ ret = 0;
+ goto out;
+exit:
+ ret = -EAGAIN;
+out:
if (pkt)
free_pkt(pkt);
- rxe_drop_ref(qp);
+ rxe_put(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 6848426c074f..b1a0ab3cd4bd 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -19,16 +19,16 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
}
if (cqe > rxe->attr.max_cqe) {
- pr_warn("cqe(%d) > max_cqe(%d)\n",
- cqe, rxe->attr.max_cqe);
+ pr_debug("cqe(%d) > max_cqe(%d)\n",
+ cqe, rxe->attr.max_cqe);
goto err1;
}
if (cq) {
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
- pr_warn("cqe(%d) < current # elements in queue (%d)",
- cqe, count);
+ pr_debug("cqe(%d) < current # elements in queue (%d)",
+ cqe, count);
goto err1;
}
}
@@ -106,9 +106,9 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe,
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
struct ib_event ev;
- unsigned long flags;
int full;
void *addr;
+ unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
@@ -150,9 +150,9 @@ void rxe_cq_disable(struct rxe_cq *cq)
spin_unlock_irqrestore(&cq->cq_lock, flags);
}
-void rxe_cq_cleanup(struct rxe_pool_entry *arg)
+void rxe_cq_cleanup(struct rxe_pool_elem *elem)
{
- struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem);
+ struct rxe_cq *cq = container_of(elem, typeof(*cq), elem);
if (cq->queue)
rxe_queue_cleanup(cq->queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index e03af3012590..46bb07c5c4df 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -151,18 +151,8 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt)
payload_size(pkt) + bth_pad(pkt));
icrc = ~icrc;
- if (unlikely(icrc != pkt_icrc)) {
- if (skb->protocol == htons(ETH_P_IPV6))
- pr_warn_ratelimited("bad ICRC from %pI6c\n",
- &ipv6_hdr(skb)->saddr);
- else if (skb->protocol == htons(ETH_P_IP))
- pr_warn_ratelimited("bad ICRC from %pI4\n",
- &ip_hdr(skb)->saddr);
- else
- pr_warn_ratelimited("bad ICRC from unknown\n");
-
+ if (unlikely(icrc != pkt_icrc))
return -EINVAL;
- }
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 1ca43b859d80..c2a5c8814a48 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
-struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
/* rxe_cq.c */
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
@@ -37,21 +37,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
void rxe_cq_disable(struct rxe_cq *cq);
-void rxe_cq_cleanup(struct rxe_pool_entry *arg);
+void rxe_cq_cleanup(struct rxe_pool_elem *elem);
/* rxe_mcast.c */
-int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
- struct rxe_mc_grp **grp_p);
-
-int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_mc_grp *grp);
-
-int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- union ib_gid *mgid);
-
-void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
-
-void rxe_mc_cleanup(struct rxe_pool_entry *arg);
+struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
+int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
+int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
+void rxe_cleanup_mcg(struct kref *kref);
/* rxe_mmap.c */
struct rxe_mmap_info {
@@ -72,10 +64,10 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
/* rxe_mr.c */
u8 rxe_get_next_key(u32 last_key);
-void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
-int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
+void rxe_mr_init_dma(int access, struct rxe_mr *mr);
+int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr);
-int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
+int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
enum rxe_mr_copy_dir dir);
int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
@@ -85,11 +77,10 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
enum rxe_mr_lookup_type type);
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
-int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
-int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr);
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
-void rxe_mr_cleanup(struct rxe_pool_entry *arg);
+void rxe_mr_cleanup(struct rxe_pool_elem *elem);
/* rxe_mw.c */
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
@@ -97,41 +88,32 @@ int rxe_dealloc_mw(struct ib_mw *ibmw);
int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
-void rxe_mw_cleanup(struct rxe_pool_entry *arg);
+void rxe_mw_cleanup(struct rxe_pool_elem *elem);
/* rxe_net.c */
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
-int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb);
+int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb);
int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
-int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
-int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
/* rxe_qp.c */
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
-
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init,
struct rxe_create_qp_resp __user *uresp,
struct ib_pd *ibpd, struct ib_udata *udata);
-
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
-
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_attr *attr, int mask);
-
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask, struct ib_udata *udata);
-
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
-
void rxe_qp_error(struct rxe_qp *qp);
-
-void rxe_qp_destroy(struct rxe_qp *qp);
-
-void rxe_qp_cleanup(struct rxe_pool_entry *arg);
+int rxe_qp_chk_destroy(struct rxe_qp *qp);
+void rxe_qp_cleanup(struct rxe_pool_elem *elem);
static inline int qp_num(struct rxe_qp *qp)
{
@@ -162,7 +144,7 @@ static inline int rcv_wqe_size(int max_sge)
max_sge * sizeof(struct ib_sge);
}
-void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
+void free_rd_atomic_resource(struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
{
@@ -175,18 +157,16 @@ void retransmit_timer(struct timer_list *t);
void rnr_nak_timer(struct timer_list *t);
/* rxe_srq.c */
-#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
-
-int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
- struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
-
+int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init);
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init, struct ib_udata *udata,
struct rxe_create_srq_resp __user *uresp);
-
+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata);
+void rxe_srq_cleanup(struct rxe_pool_elem *elem);
void rxe_dealloc(struct ib_device *ib_dev);
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 1c1d1b53312d..86cc2e18a7fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -1,179 +1,479 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
+ * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved.
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
+/*
+ * rxe_mcast.c implements driver support for multicast transport.
+ * It is based on two data structures struct rxe_mcg ('mcg') and
+ * struct rxe_mca ('mca'). An mcg is allocated each time a qp is
+ * attached to a new mgid for the first time. These are indexed by
+ * a red-black tree using the mgid. This data structure is searched
+ * for the mcg when a multicast packet is received and when another
+ * qp is attached to the same mgid. It is cleaned up when the last qp
+ * is detached from the mcg. Each time a qp is attached to an mcg an
+ * mca is created. It holds a pointer to the qp and is added to a list
+ * of qp's that are attached to the mcg. The qp_list is used to replicate
+ * mcast packets in the rxe receive path.
+ */
+
#include "rxe.h"
-#include "rxe_loc.h"
-/* caller should hold mc_grp_pool->pool_lock */
-static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe,
- struct rxe_pool *pool,
- union ib_gid *mgid)
+/**
+ * rxe_mcast_add - add multicast address to rxe device
+ * @rxe: rxe device object
+ * @mgid: multicast address as a gid
+ *
+ * Returns 0 on success else an error
+ */
+static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
- int err;
- struct rxe_mc_grp *grp;
+ unsigned char ll_addr[ETH_ALEN];
- grp = rxe_alloc_locked(&rxe->mc_grp_pool);
- if (!grp)
- return ERR_PTR(-ENOMEM);
+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
- INIT_LIST_HEAD(&grp->qp_list);
- spin_lock_init(&grp->mcg_lock);
- grp->rxe = rxe;
- rxe_add_key_locked(grp, mgid);
+ return dev_mc_add(rxe->ndev, ll_addr);
+}
- err = rxe_mcast_add(rxe, mgid);
- if (unlikely(err)) {
- rxe_drop_key_locked(grp);
- rxe_drop_ref(grp);
- return ERR_PTR(err);
+/**
+ * rxe_mcast_del - delete multicast address from rxe device
+ * @rxe: rxe device object
+ * @mgid: multicast address as a gid
+ *
+ * Returns 0 on success else an error
+ */
+static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+ unsigned char ll_addr[ETH_ALEN];
+
+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+
+ return dev_mc_del(rxe->ndev, ll_addr);
+}
+
+/**
+ * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree)
+ * @mcg: mcg object with an embedded red-black tree node
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock and
+ * is responsible to avoid adding the same mcg twice to the tree.
+ */
+static void __rxe_insert_mcg(struct rxe_mcg *mcg)
+{
+ struct rb_root *tree = &mcg->rxe->mcg_tree;
+ struct rb_node **link = &tree->rb_node;
+ struct rb_node *node = NULL;
+ struct rxe_mcg *tmp;
+ int cmp;
+
+ while (*link) {
+ node = *link;
+ tmp = rb_entry(node, struct rxe_mcg, node);
+
+ cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
+ if (cmp > 0)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
}
- return grp;
+ rb_link_node(&mcg->node, node, link);
+ rb_insert_color(&mcg->node, tree);
}
-int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
- struct rxe_mc_grp **grp_p)
+/**
+ * __rxe_remove_mcg - remove an mcg from red-black tree holding lock
+ * @mcg: mcast group object with an embedded red-black tree node
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock
+ */
+static void __rxe_remove_mcg(struct rxe_mcg *mcg)
{
- int err;
- struct rxe_mc_grp *grp;
- struct rxe_pool *pool = &rxe->mc_grp_pool;
- unsigned long flags;
+ rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
+}
- if (rxe->attr.max_mcast_qp_attach == 0)
- return -EINVAL;
+/**
+ * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ *
+ * Context: caller must hold rxe->mcg_lock
+ * Returns: mcg on success and takes a ref to mcg else NULL
+ */
+static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
+ union ib_gid *mgid)
+{
+ struct rb_root *tree = &rxe->mcg_tree;
+ struct rxe_mcg *mcg;
+ struct rb_node *node;
+ int cmp;
+
+ node = tree->rb_node;
- write_lock_irqsave(&pool->pool_lock, flags);
+ while (node) {
+ mcg = rb_entry(node, struct rxe_mcg, node);
- grp = rxe_pool_get_key_locked(pool, mgid);
- if (grp)
- goto done;
+ cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
- grp = create_grp(rxe, pool, mgid);
- if (IS_ERR(grp)) {
- write_unlock_irqrestore(&pool->pool_lock, flags);
- err = PTR_ERR(grp);
- return err;
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else
+ break;
}
-done:
- write_unlock_irqrestore(&pool->pool_lock, flags);
- *grp_p = grp;
- return 0;
+ if (node) {
+ kref_get(&mcg->ref_cnt);
+ return mcg;
+ }
+
+ return NULL;
}
-int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_mc_grp *grp)
+/**
+ * rxe_lookup_mcg - lookup up mcg in red-back tree
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ *
+ * Returns: mcg if found else NULL
+ */
+struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
+ struct rxe_mcg *mcg;
+
+ spin_lock_bh(&rxe->mcg_lock);
+ mcg = __rxe_lookup_mcg(rxe, mgid);
+ spin_unlock_bh(&rxe->mcg_lock);
+
+ return mcg;
+}
+
+/**
+ * __rxe_init_mcg - initialize a new mcg
+ * @rxe: rxe device
+ * @mgid: multicast address as a gid
+ * @mcg: new mcg object
+ *
+ * Context: caller should hold rxe->mcg lock
+ */
+static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mcg *mcg)
+{
+ kref_init(&mcg->ref_cnt);
+ memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
+ INIT_LIST_HEAD(&mcg->qp_list);
+ mcg->rxe = rxe;
+
+ /* caller holds a ref on mcg but that will be
+ * dropped when mcg goes out of scope. We need to take a ref
+ * on the pointer that will be saved in the red-black tree
+ * by __rxe_insert_mcg and used to lookup mcg from mgid later.
+ * Inserting mcg makes it visible to outside so this should
+ * be done last after the object is ready.
+ */
+ kref_get(&mcg->ref_cnt);
+ __rxe_insert_mcg(mcg);
+}
+
+/**
+ * rxe_get_mcg - lookup or allocate a mcg
+ * @rxe: rxe device object
+ * @mgid: multicast IP address as a gid
+ *
+ * Returns: mcg on success else ERR_PTR(error)
+ */
+static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+ struct rxe_mcg *mcg, *tmp;
int err;
- struct rxe_mc_elem *elem;
- /* check to see of the qp is already a member of the group */
- spin_lock_bh(&qp->grp_lock);
- spin_lock_bh(&grp->mcg_lock);
- list_for_each_entry(elem, &grp->qp_list, qp_list) {
- if (elem->qp == qp) {
- err = 0;
- goto out;
- }
- }
+ if (rxe->attr.max_mcast_grp == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* check to see if mcg already exists */
+ mcg = rxe_lookup_mcg(rxe, mgid);
+ if (mcg)
+ return mcg;
- if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
+ /* check to see if we have reached limit */
+ if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
err = -ENOMEM;
- goto out;
+ goto err_dec;
}
- elem = rxe_alloc_locked(&rxe->mc_elem_pool);
- if (!elem) {
+ /* speculative alloc of new mcg */
+ mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
+ if (!mcg) {
err = -ENOMEM;
- goto out;
+ goto err_dec;
+ }
+
+ spin_lock_bh(&rxe->mcg_lock);
+ /* re-check to see if someone else just added it */
+ tmp = __rxe_lookup_mcg(rxe, mgid);
+ if (tmp) {
+ spin_unlock_bh(&rxe->mcg_lock);
+ atomic_dec(&rxe->mcg_num);
+ kfree(mcg);
+ return tmp;
+ }
+
+ __rxe_init_mcg(rxe, mgid, mcg);
+ spin_unlock_bh(&rxe->mcg_lock);
+
+ /* add mcast address outside of lock */
+ err = rxe_mcast_add(rxe, mgid);
+ if (!err)
+ return mcg;
+
+ kfree(mcg);
+err_dec:
+ atomic_dec(&rxe->mcg_num);
+ return ERR_PTR(err);
+}
+
+/**
+ * rxe_cleanup_mcg - cleanup mcg for kref_put
+ * @kref: struct kref embnedded in mcg
+ */
+void rxe_cleanup_mcg(struct kref *kref)
+{
+ struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
+
+ kfree(mcg);
+}
+
+/**
+ * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock
+ * @mcg: the mcg object
+ *
+ * Context: caller is holding rxe->mcg_lock
+ * no qp's are attached to mcg
+ */
+static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
+{
+ struct rxe_dev *rxe = mcg->rxe;
+
+ /* remove mcg from red-black tree then drop ref */
+ __rxe_remove_mcg(mcg);
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+
+ atomic_dec(&rxe->mcg_num);
+}
+
+/**
+ * rxe_destroy_mcg - destroy mcg object
+ * @mcg: the mcg object
+ *
+ * Context: no qp's are attached to mcg
+ */
+static void rxe_destroy_mcg(struct rxe_mcg *mcg)
+{
+ /* delete mcast address outside of lock */
+ rxe_mcast_del(mcg->rxe, &mcg->mgid);
+
+ spin_lock_bh(&mcg->rxe->mcg_lock);
+ __rxe_destroy_mcg(mcg);
+ spin_unlock_bh(&mcg->rxe->mcg_lock);
+}
+
+/**
+ * __rxe_init_mca - initialize a new mca holding lock
+ * @qp: qp object
+ * @mcg: mcg object
+ * @mca: empty space for new mca
+ *
+ * Context: caller must hold references on qp and mcg, rxe->mcg_lock
+ * and pass memory for new mca
+ *
+ * Returns: 0 on success else an error
+ */
+static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
+ struct rxe_mca *mca)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int n;
+
+ n = atomic_inc_return(&rxe->mcg_attach);
+ if (n > rxe->attr.max_total_mcast_qp_attach) {
+ atomic_dec(&rxe->mcg_attach);
+ return -ENOMEM;
+ }
+
+ n = atomic_inc_return(&mcg->qp_num);
+ if (n > rxe->attr.max_mcast_qp_attach) {
+ atomic_dec(&mcg->qp_num);
+ atomic_dec(&rxe->mcg_attach);
+ return -ENOMEM;
}
- /* each qp holds a ref on the grp */
- rxe_add_ref(grp);
+ atomic_inc(&qp->mcg_num);
+
+ rxe_get(qp);
+ mca->qp = qp;
+
+ list_add_tail(&mca->qp_list, &mcg->qp_list);
+
+ return 0;
+}
- grp->num_qp++;
- elem->qp = qp;
- elem->grp = grp;
+/**
+ * rxe_attach_mcg - attach qp to mcg if not already attached
+ * @qp: qp object
+ * @mcg: mcg object
+ *
+ * Context: caller must hold reference on qp and mcg.
+ * Returns: 0 on success else an error
+ */
+static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
+{
+ struct rxe_dev *rxe = mcg->rxe;
+ struct rxe_mca *mca, *tmp;
+ int err;
- list_add(&elem->qp_list, &grp->qp_list);
- list_add(&elem->grp_list, &qp->grp_list);
+ /* check to see if the qp is already a member of the group */
+ spin_lock_bh(&rxe->mcg_lock);
+ list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+ if (mca->qp == qp) {
+ spin_unlock_bh(&rxe->mcg_lock);
+ return 0;
+ }
+ }
+ spin_unlock_bh(&rxe->mcg_lock);
- err = 0;
+ /* speculative alloc new mca without using GFP_ATOMIC */
+ mca = kzalloc(sizeof(*mca), GFP_KERNEL);
+ if (!mca)
+ return -ENOMEM;
+
+ spin_lock_bh(&rxe->mcg_lock);
+ /* re-check to see if someone else just attached qp */
+ list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
+ if (tmp->qp == qp) {
+ kfree(mca);
+ err = 0;
+ goto out;
+ }
+ }
+
+ err = __rxe_init_mca(qp, mcg, mca);
+ if (err)
+ kfree(mca);
out:
- spin_unlock_bh(&grp->mcg_lock);
- spin_unlock_bh(&qp->grp_lock);
+ spin_unlock_bh(&rxe->mcg_lock);
return err;
}
-int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- union ib_gid *mgid)
+/**
+ * __rxe_cleanup_mca - cleanup mca object holding lock
+ * @mca: mca object
+ * @mcg: mcg object
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock
+ */
+static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
{
- struct rxe_mc_grp *grp;
- struct rxe_mc_elem *elem, *tmp;
+ list_del(&mca->qp_list);
- grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
- if (!grp)
- goto err1;
+ atomic_dec(&mcg->qp_num);
+ atomic_dec(&mcg->rxe->mcg_attach);
+ atomic_dec(&mca->qp->mcg_num);
+ rxe_put(mca->qp);
- spin_lock_bh(&qp->grp_lock);
- spin_lock_bh(&grp->mcg_lock);
+ kfree(mca);
+}
+
+/**
+ * rxe_detach_mcg - detach qp from mcg
+ * @mcg: mcg object
+ * @qp: qp object
+ *
+ * Returns: 0 on success else an error if qp is not attached.
+ */
+static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
+{
+ struct rxe_dev *rxe = mcg->rxe;
+ struct rxe_mca *mca, *tmp;
- list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
- if (elem->qp == qp) {
- list_del(&elem->qp_list);
- list_del(&elem->grp_list);
- grp->num_qp--;
+ spin_lock_bh(&rxe->mcg_lock);
+ list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
+ if (mca->qp == qp) {
+ __rxe_cleanup_mca(mca, mcg);
- spin_unlock_bh(&grp->mcg_lock);
- spin_unlock_bh(&qp->grp_lock);
- rxe_drop_ref(elem);
- rxe_drop_ref(grp); /* ref held by QP */
- rxe_drop_ref(grp); /* ref from get_key */
+ /* if the number of qp's attached to the
+ * mcast group falls to zero go ahead and
+ * tear it down. This will not free the
+ * object since we are still holding a ref
+ * from the caller
+ */
+ if (atomic_read(&mcg->qp_num) <= 0)
+ __rxe_destroy_mcg(mcg);
+
+ spin_unlock_bh(&rxe->mcg_lock);
return 0;
}
}
- spin_unlock_bh(&grp->mcg_lock);
- spin_unlock_bh(&qp->grp_lock);
- rxe_drop_ref(grp); /* ref from get_key */
-err1:
+ /* we didn't find the qp on the list */
+ spin_unlock_bh(&rxe->mcg_lock);
return -EINVAL;
}
-void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
+/**
+ * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1)
+ * @ibqp: (IB) qp object
+ * @mgid: multicast IP address
+ * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6)
+ *
+ * Returns: 0 on success else an errno
+ */
+int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
{
- struct rxe_mc_grp *grp;
- struct rxe_mc_elem *elem;
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+ struct rxe_mcg *mcg;
- while (1) {
- spin_lock_bh(&qp->grp_lock);
- if (list_empty(&qp->grp_list)) {
- spin_unlock_bh(&qp->grp_lock);
- break;
- }
- elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem,
- grp_list);
- list_del(&elem->grp_list);
- spin_unlock_bh(&qp->grp_lock);
-
- grp = elem->grp;
- spin_lock_bh(&grp->mcg_lock);
- list_del(&elem->qp_list);
- grp->num_qp--;
- spin_unlock_bh(&grp->mcg_lock);
- rxe_drop_ref(grp);
- rxe_drop_ref(elem);
- }
+ /* takes a ref on mcg if successful */
+ mcg = rxe_get_mcg(rxe, mgid);
+ if (IS_ERR(mcg))
+ return PTR_ERR(mcg);
+
+ err = rxe_attach_mcg(mcg, qp);
+
+ /* if we failed to attach the first qp to mcg tear it down */
+ if (atomic_read(&mcg->qp_num) == 0)
+ rxe_destroy_mcg(mcg);
+
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+
+ return err;
}
-void rxe_mc_cleanup(struct rxe_pool_entry *arg)
+/**
+ * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2)
+ * @ibqp: address of (IB) qp object
+ * @mgid: multicast IP address
+ * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6)
+ *
+ * Returns: 0 on success else an errno
+ */
+int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
{
- struct rxe_mc_grp *grp = container_of(arg, typeof(*grp), pelem);
- struct rxe_dev *rxe = grp->rxe;
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+ struct rxe_mcg *mcg;
+ int err;
- rxe_drop_key(grp);
- rxe_mcast_delete(rxe, &grp->mgid);
+ mcg = rxe_lookup_mcg(rxe, mgid);
+ if (!mcg)
+ return -EINVAL;
+
+ err = rxe_detach_mcg(mcg, qp);
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+
+ return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 035f226af133..9149b6095429 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -4,7 +4,6 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
-#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/errno.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 53271df10e47..502e9ada99b3 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -24,7 +24,7 @@ u8 rxe_get_next_key(u32 last_key)
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
- struct rxe_map_set *set = mr->cur_map_set;
+
switch (mr->type) {
case IB_MR_TYPE_DMA:
@@ -32,8 +32,8 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
case IB_MR_TYPE_USER:
case IB_MR_TYPE_MEM_REG:
- if (iova < set->iova || length > set->length ||
- iova > set->iova + set->length - length)
+ if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
+ iova > mr->ibmr.iova + mr->ibmr.length - length)
return -EFAULT;
return 0;
@@ -50,7 +50,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
static void rxe_mr_init(int access, struct rxe_mr *mr)
{
- u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
+ u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
/* set ibmr->l/rkey and also copy into private l/rkey
@@ -65,106 +65,56 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set)
-{
- int i;
-
- for (i = 0; i < num_map; i++)
- kfree(set->map[i]);
-
- kfree(set->map);
- kfree(set);
-}
-
-static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp)
+static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
{
int i;
- struct rxe_map_set *set;
+ int num_map;
+ struct rxe_map **map = mr->map;
- set = kmalloc(sizeof(*set), GFP_KERNEL);
- if (!set)
- goto err_out;
+ num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
- set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL);
- if (!set->map)
- goto err_free_set;
+ mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
+ if (!mr->map)
+ goto err1;
for (i = 0; i < num_map; i++) {
- set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL);
- if (!set->map[i])
- goto err_free_map;
+ mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
+ if (!mr->map[i])
+ goto err2;
}
- *setp = set;
-
- return 0;
-
-err_free_map:
- for (i--; i >= 0; i--)
- kfree(set->map[i]);
-
- kfree(set->map);
-err_free_set:
- kfree(set);
-err_out:
- return -ENOMEM;
-}
-
-/**
- * rxe_mr_alloc() - Allocate memory map array(s) for MR
- * @mr: Memory region
- * @num_buf: Number of buffer descriptors to support
- * @both: If non zero allocate both mr->map and mr->next_map
- * else just allocate mr->map. Used for fast MRs
- *
- * Return: 0 on success else an error
- */
-static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both)
-{
- int ret;
- int num_map;
-
BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
- num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
mr->map_mask = RXE_BUF_PER_MAP - 1;
+
mr->num_buf = num_buf;
- mr->max_buf = num_map * RXE_BUF_PER_MAP;
mr->num_map = num_map;
-
- ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set);
- if (ret)
- goto err_out;
-
- if (both) {
- ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set);
- if (ret) {
- rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
- goto err_out;
- }
- }
+ mr->max_buf = num_map * RXE_BUF_PER_MAP;
return 0;
-err_out:
+err2:
+ for (i--; i >= 0; i--)
+ kfree(mr->map[i]);
+
+ kfree(mr->map);
+err1:
return -ENOMEM;
}
-void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
+void rxe_mr_init_dma(int access, struct rxe_mr *mr)
{
rxe_mr_init(access, mr);
- mr->ibmr.pd = &pd->ibpd;
mr->access = access;
mr->state = RXE_MR_STATE_VALID;
mr->type = IB_MR_TYPE_DMA;
}
-int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
+int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr)
{
- struct rxe_map_set *set;
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
struct ib_umem *umem;
@@ -172,8 +122,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
int num_buf;
void *vaddr;
int err;
+ int i;
- umem = ib_umem_get(pd->ibpd.device, start, length, access);
+ umem = ib_umem_get(&rxe->ib_dev, start, length, access);
if (IS_ERR(umem)) {
pr_warn("%s: Unable to pin memory region err = %d\n",
__func__, (int)PTR_ERR(umem));
@@ -185,20 +136,18 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
rxe_mr_init(access, mr);
- err = rxe_mr_alloc(mr, num_buf, 0);
+ err = rxe_mr_alloc(mr, num_buf);
if (err) {
pr_warn("%s: Unable to allocate memory for map\n",
__func__);
goto err_release_umem;
}
- set = mr->cur_map_set;
- set->page_shift = PAGE_SHIFT;
- set->page_mask = PAGE_SIZE - 1;
-
- num_buf = 0;
- map = set->map;
+ mr->page_shift = PAGE_SHIFT;
+ mr->page_mask = PAGE_SIZE - 1;
+ num_buf = 0;
+ map = mr->map;
if (length > 0) {
buf = map[0]->buf;
@@ -221,42 +170,39 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
buf->size = PAGE_SIZE;
num_buf++;
buf++;
+
}
}
- mr->ibmr.pd = &pd->ibpd;
mr->umem = umem;
mr->access = access;
+ mr->offset = ib_umem_offset(umem);
mr->state = RXE_MR_STATE_VALID;
mr->type = IB_MR_TYPE_USER;
- set->length = length;
- set->iova = iova;
- set->va = start;
- set->offset = ib_umem_offset(umem);
-
return 0;
err_cleanup_map:
- rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
+ kfree(mr->map);
err_release_umem:
ib_umem_release(umem);
err_out:
return err;
}
-int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
+int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
{
int err;
/* always allow remote access for FMRs */
rxe_mr_init(IB_ACCESS_REMOTE, mr);
- err = rxe_mr_alloc(mr, max_pages, 1);
+ err = rxe_mr_alloc(mr, max_pages);
if (err)
goto err1;
- mr->ibmr.pd = &pd->ibpd;
mr->max_buf = max_pages;
mr->state = RXE_MR_STATE_FREE;
mr->type = IB_MR_TYPE_MEM_REG;
@@ -270,24 +216,21 @@ err1:
static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
size_t *offset_out)
{
- struct rxe_map_set *set = mr->cur_map_set;
- size_t offset = iova - set->iova + set->offset;
+ size_t offset = iova - mr->ibmr.iova + mr->offset;
int map_index;
int buf_index;
u64 length;
- struct rxe_map *map;
- if (likely(set->page_shift)) {
- *offset_out = offset & set->page_mask;
- offset >>= set->page_shift;
+ if (likely(mr->page_shift)) {
+ *offset_out = offset & mr->page_mask;
+ offset >>= mr->page_shift;
*n_out = offset & mr->map_mask;
*m_out = offset >> mr->map_shift;
} else {
map_index = 0;
buf_index = 0;
- map = set->map[map_index];
- length = map->buf[buf_index].size;
+ length = mr->map[map_index]->buf[buf_index].size;
while (offset >= length) {
offset -= length;
@@ -297,8 +240,7 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
map_index++;
buf_index = 0;
}
- map = set->map[map_index];
- length = map->buf[buf_index].size;
+ length = mr->map[map_index]->buf[buf_index].size;
}
*m_out = map_index;
@@ -319,7 +261,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
goto out;
}
- if (!mr->cur_map_set) {
+ if (!mr->map) {
addr = (void *)(uintptr_t)iova;
goto out;
}
@@ -332,13 +274,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
lookup_iova(mr, iova, &m, &n, &offset);
- if (offset + length > mr->cur_map_set->map[m]->buf[n].size) {
+ if (offset + length > mr->map[m]->buf[n].size) {
pr_warn("crosses page boundary\n");
addr = NULL;
goto out;
}
- addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset;
+ addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
out:
return addr;
@@ -374,7 +316,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
return 0;
}
- WARN_ON_ONCE(!mr->cur_map_set);
+ WARN_ON_ONCE(!mr->map);
err = mr_check_range(mr, iova, length);
if (err) {
@@ -384,7 +326,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
lookup_iova(mr, iova, &m, &i, &offset);
- map = mr->cur_map_set->map + m;
+ map = mr->map + m;
buf = map[0]->buf + i;
while (length > 0) {
@@ -461,7 +403,7 @@ int copy_data(
if (offset >= sge->length) {
if (mr) {
- rxe_drop_ref(mr);
+ rxe_put(mr);
mr = NULL;
}
sge++;
@@ -506,13 +448,13 @@ int copy_data(
dma->resid = resid;
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
return 0;
err2:
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
err1:
return err;
}
@@ -571,29 +513,29 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
(type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
mr_pd(mr) != pd || (access && !(access & mr->access)) ||
mr->state != RXE_MR_STATE_VALID)) {
- rxe_drop_ref(mr);
+ rxe_put(mr);
mr = NULL;
}
return mr;
}
-int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mr *mr;
int ret;
- mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
if (!mr) {
- pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
+ pr_err("%s: No MR for key %#x\n", __func__, key);
ret = -EINVAL;
goto err;
}
- if (rkey != mr->rkey) {
- pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n",
- __func__, rkey, mr->rkey);
+ if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
+ pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
+ __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
@@ -615,7 +557,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
ret = 0;
err_drop_ref:
- rxe_drop_ref(mr);
+ rxe_put(mr);
err:
return ret;
}
@@ -630,9 +572,8 @@ err:
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
- u32 key = wqe->wr.wr.reg.key & 0xff;
+ u32 key = wqe->wr.wr.reg.key;
u32 access = wqe->wr.wr.reg.access;
- struct rxe_map_set *set;
/* user can only register MR in free state */
if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
@@ -648,36 +589,19 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return -EINVAL;
}
+ /* user is only allowed to change key portion of l/rkey */
+ if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
+ pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
+ __func__, key, mr->lkey);
+ return -EINVAL;
+ }
+
mr->access = access;
- mr->lkey = (mr->lkey & ~0xff) | key;
- mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0;
+ mr->lkey = key;
+ mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
+ mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
mr->state = RXE_MR_STATE_VALID;
- set = mr->cur_map_set;
- mr->cur_map_set = mr->next_map_set;
- mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova;
- mr->next_map_set = set;
-
- return 0;
-}
-
-int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct rxe_mr *mr = to_rmr(ibmr);
- struct rxe_map_set *set = mr->next_map_set;
- struct rxe_map *map;
- struct rxe_phys_buf *buf;
-
- if (unlikely(set->nbuf == mr->num_buf))
- return -ENOMEM;
-
- map = set->map[set->nbuf / RXE_BUF_PER_MAP];
- buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP];
-
- buf->addr = addr;
- buf->size = ibmr->page_size;
- set->nbuf++;
-
return 0;
}
@@ -685,29 +609,27 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct rxe_mr *mr = to_rmr(ibmr);
- if (atomic_read(&mr->num_mw) > 0) {
- pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
- __func__);
+ /* See IBA 10.6.7.2.6 */
+ if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;
- }
- mr->state = RXE_MR_STATE_INVALID;
- rxe_drop_ref(mr_pd(mr));
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
+ rxe_cleanup(mr);
return 0;
}
-void rxe_mr_cleanup(struct rxe_pool_entry *arg)
+void rxe_mr_cleanup(struct rxe_pool_elem *elem)
{
- struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
+ struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
+ int i;
+ rxe_put(mr_pd(mr));
ib_umem_release(mr->umem);
- if (mr->cur_map_set)
- rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
+ if (mr->map) {
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
- if (mr->next_map_set)
- rxe_mr_free_map_set(mr->num_map, mr->next_map_set);
+ kfree(mr->map);
+ }
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 9534a7fe1a98..902b7df7aaed 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -3,6 +3,14 @@
* Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
*/
+/*
+ * The rdma_rxe driver supports type 1 or type 2B memory windows.
+ * Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by
+ * ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw()
+ * but bound by bind_mw work requests. The ibv_bind_mw() call is converted
+ * by libibverbs to a bind_mw work request.
+ */
+
#include "rxe.h"
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
@@ -12,58 +20,29 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
struct rxe_dev *rxe = to_rdev(ibmw->device);
int ret;
- rxe_add_ref(pd);
+ rxe_get(pd);
ret = rxe_add_to_pool(&rxe->mw_pool, mw);
if (ret) {
- rxe_drop_ref(pd);
+ rxe_put(pd);
return ret;
}
- rxe_add_index(mw);
- mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
+ mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1);
mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
spin_lock_init(&mw->lock);
- return 0;
-}
-
-static void rxe_do_dealloc_mw(struct rxe_mw *mw)
-{
- if (mw->mr) {
- struct rxe_mr *mr = mw->mr;
-
- mw->mr = NULL;
- atomic_dec(&mr->num_mw);
- rxe_drop_ref(mr);
- }
+ rxe_finalize(mw);
- if (mw->qp) {
- struct rxe_qp *qp = mw->qp;
-
- mw->qp = NULL;
- rxe_drop_ref(qp);
- }
-
- mw->access = 0;
- mw->addr = 0;
- mw->length = 0;
- mw->state = RXE_MW_STATE_INVALID;
+ return 0;
}
int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);
- struct rxe_pd *pd = to_rpd(ibmw->pd);
- unsigned long flags;
- spin_lock_irqsave(&mw->lock, flags);
- rxe_do_dealloc_mw(mw);
- spin_unlock_irqrestore(&mw->lock, flags);
-
- rxe_drop_ref(mw);
- rxe_drop_ref(pd);
+ rxe_cleanup(mw);
return 0;
}
@@ -71,8 +50,6 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_mw *mw, struct rxe_mr *mr)
{
- u32 key = wqe->wr.wr.mw.rkey & 0xff;
-
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
pr_err_once(
@@ -110,11 +87,6 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
}
- if (unlikely(key == (mw->rkey & 0xff))) {
- pr_err_once("attempt to bind MW with same key\n");
- return -EINVAL;
- }
-
/* remaining checks only apply to a nonzero MR */
if (!mr)
return 0;
@@ -136,21 +108,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
pr_err_once(
- "attempt to bind an writeable MW to an MR without local write access\n");
+ "attempt to bind an Writable MW to an MR without local write access\n");
return -EINVAL;
}
/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
- if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) {
+ if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
pr_err_once(
"attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
} else {
- if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) ||
+ if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) ||
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
- (mr->cur_map_set->iova + mr->cur_map_set->length)))) {
+ (mr->ibmr.iova + mr->ibmr.length)))) {
pr_err_once(
"attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
@@ -172,7 +144,7 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
mw->length = wqe->wr.wr.mw.length;
if (mw->mr) {
- rxe_drop_ref(mw->mr);
+ rxe_put(mw->mr);
atomic_dec(&mw->mr->num_mw);
mw->mr = NULL;
}
@@ -180,11 +152,11 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (mw->length) {
mw->mr = mr;
atomic_inc(&mr->num_mw);
- rxe_add_ref(mr);
+ rxe_get(mr);
}
if (mw->ibmw.type == IB_MW_TYPE_2) {
- rxe_add_ref(qp);
+ rxe_get(qp);
mw->qp = qp;
}
}
@@ -197,7 +169,6 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
u32 mw_rkey = wqe->wr.wr.mw.mw_rkey;
u32 mr_lkey = wqe->wr.wr.mw.mr_lkey;
- unsigned long flags;
mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8);
if (unlikely(!mw)) {
@@ -225,7 +196,7 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
mr = NULL;
}
- spin_lock_irqsave(&mw->lock, flags);
+ spin_lock_bh(&mw->lock);
ret = rxe_check_bind_mw(qp, wqe, mw, mr);
if (ret)
@@ -233,12 +204,12 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
rxe_do_bind_mw(qp, wqe, mw, mr);
err_unlock:
- spin_unlock_irqrestore(&mw->lock, flags);
+ spin_unlock_bh(&mw->lock);
err_drop_mr:
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
err_drop_mw:
- rxe_drop_ref(mw);
+ rxe_put(mw);
err:
return ret;
}
@@ -263,13 +234,13 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw)
/* valid type 2 MW will always have a QP pointer */
qp = mw->qp;
mw->qp = NULL;
- rxe_drop_ref(qp);
+ rxe_put(qp);
/* valid type 2 MW will always have an MR pointer */
mr = mw->mr;
mw->mr = NULL;
atomic_dec(&mr->num_mw);
- rxe_drop_ref(mr);
+ rxe_put(mr);
mw->access = 0;
mw->addr = 0;
@@ -280,7 +251,6 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw)
int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- unsigned long flags;
struct rxe_mw *mw;
int ret;
@@ -295,7 +265,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
goto err_drop_ref;
}
- spin_lock_irqsave(&mw->lock, flags);
+ spin_lock_bh(&mw->lock);
ret = rxe_check_invalidate_mw(qp, mw);
if (ret)
@@ -303,9 +273,9 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
rxe_do_invalidate_mw(mw);
err_unlock:
- spin_unlock_irqrestore(&mw->lock, flags);
+ spin_unlock_bh(&mw->lock);
err_drop_ref:
- rxe_drop_ref(mw);
+ rxe_put(mw);
err:
return ret;
}
@@ -326,16 +296,37 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
(mw->length == 0) ||
(access && !(access & mw->access)) ||
mw->state != RXE_MW_STATE_VALID)) {
- rxe_drop_ref(mw);
+ rxe_put(mw);
return NULL;
}
return mw;
}
-void rxe_mw_cleanup(struct rxe_pool_entry *elem)
+void rxe_mw_cleanup(struct rxe_pool_elem *elem)
{
- struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
+ struct rxe_mw *mw = container_of(elem, typeof(*mw), elem);
+ struct rxe_pd *pd = to_rpd(mw->ibmw.pd);
+
+ rxe_put(pd);
+
+ if (mw->mr) {
+ struct rxe_mr *mr = mw->mr;
+
+ mw->mr = NULL;
+ atomic_dec(&mr->num_mw);
+ rxe_put(mr);
+ }
+
+ if (mw->qp) {
+ struct rxe_qp *qp = mw->qp;
+
+ mw->qp = NULL;
+ rxe_put(qp);
+ }
- rxe_drop_index(mw);
+ mw->access = 0;
+ mw->addr = 0;
+ mw->length = 0;
+ mw->state = RXE_MW_STATE_INVALID;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 2cb810cb890a..35f327b9d4b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -20,28 +20,6 @@
static struct rxe_recv_sockets recv_sockets;
-int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
-{
- int err;
- unsigned char ll_addr[ETH_ALEN];
-
- ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
- err = dev_mc_add(rxe->ndev, ll_addr);
-
- return err;
-}
-
-int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
-{
- int err;
- unsigned char ll_addr[ETH_ALEN];
-
- ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
- err = dev_mc_del(rxe->ndev, ll_addr);
-
- return err;
-}
-
static struct dst_entry *rxe_find_route4(struct net_device *ndev,
struct in_addr *saddr,
struct in_addr *daddr)
@@ -167,7 +145,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
goto drop;
if (skb_linearize(skb)) {
- pr_err("skb_linearize failed\n");
ib_device_put(&rxe->ib_dev);
goto drop;
}
@@ -293,13 +270,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}
-static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
- struct rxe_av *av = rxe_get_av(pkt);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
@@ -319,11 +296,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
-static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
- struct rxe_av *av = rxe_get_av(pkt);
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
@@ -344,16 +321,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
-int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
int err = 0;
if (skb->protocol == htons(ETH_P_IP))
- err = prepare4(pkt, skb);
+ err = prepare4(av, pkt, skb);
else if (skb->protocol == htons(ETH_P_IPV6))
- err = prepare6(pkt, skb);
+ err = prepare6(av, pkt, skb);
- if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac))
+ if (ether_addr_equal(skb->dev->dev_addr, av->dmac))
pkt->mask |= RXE_LOOPBACK_MASK;
return err;
@@ -369,7 +347,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
rxe_run_task(&qp->req.task, 1);
- rxe_drop_ref(qp);
+ rxe_put(qp);
}
static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
@@ -379,7 +357,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
skb->destructor = rxe_skb_tx_dtor;
skb->sk = pkt->qp->sk->sk;
- rxe_add_ref(pkt->qp);
+ rxe_get(pkt->qp);
atomic_inc(&pkt->qp->skb_out);
if (skb->protocol == htons(ETH_P_IP)) {
@@ -389,7 +367,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
} else {
pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
atomic_dec(&pkt->qp->skb_out);
- rxe_drop_ref(pkt->qp);
+ rxe_put(pkt->qp);
kfree_skb(skb);
return -EINVAL;
}
@@ -444,7 +422,6 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
else
err = rxe_send(skb, pkt);
if (err) {
- rxe->xmit_errors++;
rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 3ef5a10a6efd..d4ba4d506f17 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -29,7 +29,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_SEND] = {
.name = "IB_WR_SEND",
.mask = {
- [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
@@ -39,7 +38,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_SEND_WITH_IMM] = {
.name = "IB_WR_SEND_WITH_IMM",
.mask = {
- [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
@@ -108,8 +106,8 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
[IB_OPCODE_RC_SEND_FIRST] = {
.name = "IB_OPCODE_RC_SEND_FIRST",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
- | RXE_SEND_MASK | RXE_START_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK |
+ RXE_SEND_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -117,9 +115,9 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
}
},
[IB_OPCODE_RC_SEND_MIDDLE] = {
- .name = "IB_OPCODE_RC_SEND_MIDDLE]",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
- | RXE_MIDDLE_MASK,
+ .name = "IB_OPCODE_RC_SEND_MIDDLE",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -128,8 +126,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_RC_SEND_LAST] = {
.name = "IB_OPCODE_RC_SEND_LAST",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
- | RXE_SEND_MASK | RXE_END_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK |
+ RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -138,21 +136,21 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE",
- .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_SEND_ONLY] = {
.name = "IB_OPCODE_RC_SEND_ONLY",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
- | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK |
+ RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -161,33 +159,33 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE",
- .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_FIRST] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_FIRST",
- .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_START_MASK,
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_MIDDLE_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -196,8 +194,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_RC_RDMA_WRITE_LAST] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_LAST",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_END_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -206,69 +204,69 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
- .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_END_MASK,
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_ONLY] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_ONLY",
- .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_START_MASK
- | RXE_END_MASK,
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_START_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
- .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_REQUEST] = {
.name = "IB_OPCODE_RC_RDMA_READ_REQUEST",
- .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST",
- .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
- | RXE_START_MASK,
+ .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK |
+ RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = {
@@ -282,109 +280,110 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST",
- .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
- | RXE_END_MASK,
+ .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = {
.name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY",
- .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RC_ACKNOWLEDGE",
- .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK
- | RXE_END_MASK,
+ .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE",
- .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_AETH] = RXE_BTH_BYTES,
- [RXE_ATMACK] = RXE_BTH_BYTES
- + RXE_AETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
+ [RXE_ATMACK] = RXE_BTH_BYTES +
+ RXE_AETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_ATMACK_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RC_COMPARE_SWAP] = {
.name = "IB_OPCODE_RC_COMPARE_SWAP",
- .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_ATMETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_ATMETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_ATMETH_BYTES,
}
},
[IB_OPCODE_RC_FETCH_ADD] = {
.name = "IB_OPCODE_RC_FETCH_ADD",
- .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_ATMETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_ATMETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_ATMETH_BYTES,
}
},
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = {
.name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE",
- .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+ .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IETH_BYTES,
}
},
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = {
.name = "IB_OPCODE_RC_SEND_ONLY_INV",
- .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_END_MASK | RXE_START_MASK,
+ .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_END_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_IETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IETH_BYTES,
}
},
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = {
.name = "IB_OPCODE_UC_SEND_FIRST",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
- | RXE_SEND_MASK | RXE_START_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK |
+ RXE_SEND_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -393,8 +392,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_UC_SEND_MIDDLE] = {
.name = "IB_OPCODE_UC_SEND_MIDDLE",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
- | RXE_MIDDLE_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -403,8 +402,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_UC_SEND_LAST] = {
.name = "IB_OPCODE_UC_SEND_LAST",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
- | RXE_SEND_MASK | RXE_END_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK |
+ RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -413,21 +412,21 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE",
- .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_UC_SEND_ONLY] = {
.name = "IB_OPCODE_UC_SEND_ONLY",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
- | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK |
+ RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -436,33 +435,33 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE",
- .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_FIRST] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_FIRST",
- .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_START_MASK,
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_MIDDLE_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -471,8 +470,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_UC_RDMA_WRITE_LAST] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_LAST",
- .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_END_MASK,
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES,
.offset = {
[RXE_BTH] = 0,
@@ -481,460 +480,460 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
},
[IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
- .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_END_MASK,
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_IMMDT] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_ONLY] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_ONLY",
- .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_START_MASK
- | RXE_END_MASK,
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_START_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
- .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RETH] = RXE_BTH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_RETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
/* RD */
[IB_OPCODE_RD_SEND_FIRST] = {
.name = "IB_OPCODE_RD_SEND_FIRST",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_MIDDLE] = {
.name = "IB_OPCODE_RD_SEND_MIDDLE",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_SEND_MASK
- | RXE_MIDDLE_MASK,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_SEND_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_LAST] = {
.name = "IB_OPCODE_RD_SEND_LAST",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK
- | RXE_END_MASK,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
- | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_SEND_MASK
- | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK |
+ RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_SEND_MASK |
+ RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_SEND_ONLY] = {
.name = "IB_OPCODE_RD_SEND_ONLY",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
- | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK |
+ RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_FIRST] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_FIRST",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
- | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_START_MASK,
- .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK |
+ RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_RETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_MIDDLE_MASK,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_LAST] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_LAST",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_END_MASK,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
- | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK |
+ RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_ONLY] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_ONLY",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
- | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_WRITE_MASK | RXE_START_MASK
- | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK |
+ RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_WRITE_MASK | RXE_START_MASK |
+ RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_RETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_RETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
- | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_WRITE_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_START_MASK | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES
- + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK |
+ RXE_IMMDT_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_WRITE_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES +
+ RXE_DETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_RETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_RETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES
- + RXE_RETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_REQUEST] = {
.name = "IB_OPCODE_RD_RDMA_READ_REQUEST",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
- | RXE_REQ_MASK | RXE_READ_MASK
- | RXE_START_MASK | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK |
+ RXE_REQ_MASK | RXE_READ_MASK |
+ RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_RETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RETH_BYTES
- + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST",
- .mask = RXE_RDETH_MASK | RXE_AETH_MASK
- | RXE_PAYLOAD_MASK | RXE_ACK_MASK
- | RXE_START_MASK,
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK |
+ RXE_PAYLOAD_MASK | RXE_ACK_MASK |
+ RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_AETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE",
- .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
- | RXE_MIDDLE_MASK,
+ .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK |
+ RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST",
- .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
- | RXE_ACK_MASK | RXE_END_MASK,
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_ACK_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_AETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = {
.name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY",
- .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
- | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK |
+ RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_AETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RD_ACKNOWLEDGE",
- .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_AETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = {
.name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE",
- .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK
- | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK |
+ RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_AETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_ATMACK] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_AETH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_ATMACK] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_AETH_BYTES,
}
},
[IB_OPCODE_RD_COMPARE_SWAP] = {
.name = "RD_COMPARE_SWAP",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
- | RXE_REQ_MASK | RXE_ATOMIC_MASK
- | RXE_START_MASK | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK |
+ RXE_REQ_MASK | RXE_ATOMIC_MASK |
+ RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_ATMETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_ATMETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES +
- + RXE_ATMETH_BYTES
- + RXE_DETH_BYTES +
- + RXE_RDETH_BYTES,
+ RXE_ATMETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
}
},
[IB_OPCODE_RD_FETCH_ADD] = {
.name = "IB_OPCODE_RD_FETCH_ADD",
- .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
- | RXE_REQ_MASK | RXE_ATOMIC_MASK
- | RXE_START_MASK | RXE_END_MASK,
- .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
- + RXE_RDETH_BYTES,
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK |
+ RXE_REQ_MASK | RXE_ATOMIC_MASK |
+ RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_RDETH] = RXE_BTH_BYTES,
- [RXE_DETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES,
- [RXE_ATMETH] = RXE_BTH_BYTES
- + RXE_RDETH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES,
+ [RXE_ATMETH] = RXE_BTH_BYTES +
+ RXE_RDETH_BYTES +
+ RXE_DETH_BYTES,
[RXE_PAYLOAD] = RXE_BTH_BYTES +
- + RXE_ATMETH_BYTES
- + RXE_DETH_BYTES +
- + RXE_RDETH_BYTES,
+ RXE_ATMETH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_RDETH_BYTES,
}
},
/* UD */
[IB_OPCODE_UD_SEND_ONLY] = {
.name = "IB_OPCODE_UD_SEND_ONLY",
- .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
- | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK |
+ RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_DETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_DETH] = RXE_BTH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_DETH_BYTES,
}
},
[IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = {
.name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE",
- .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
- | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
- | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
+ .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK |
+ RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK |
+ RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
.length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES,
.offset = {
[RXE_BTH] = 0,
[RXE_DETH] = RXE_BTH_BYTES,
- [RXE_IMMDT] = RXE_BTH_BYTES
- + RXE_DETH_BYTES,
- [RXE_PAYLOAD] = RXE_BTH_BYTES
- + RXE_DETH_BYTES
- + RXE_IMMDT_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES +
+ RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ RXE_DETH_BYTES +
+ RXE_IMMDT_BYTES,
}
},
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 918270e34a35..86c7a8bf3cbb 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -50,9 +50,7 @@ enum rxe_device_param {
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
- | IB_DEVICE_ALLOW_USER_UNREG
| IB_DEVICE_MEM_WINDOW
- | IB_DEVICE_MEM_WINDOW_TYPE_2A
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
@@ -107,6 +105,12 @@ enum rxe_device_param {
RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
+ /* Max number of interations of each tasklet
+ * before yielding the cpu to let other
+ * work make progress
+ */
+ RXE_MAX_ITERATIONS = 1024,
+
/* Delay before calling arbiter timer */
RXE_NSEC_ARB_TIMER_DELAY = 200,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 2e80bb6aa957..f50620f5a0a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -5,499 +5,298 @@
*/
#include "rxe.h"
-#include "rxe_loc.h"
+
+#define RXE_POOL_TIMEOUT (200)
+#define RXE_POOL_ALIGN (16)
static const struct rxe_type_info {
const char *name;
size_t size;
size_t elem_offset;
- void (*cleanup)(struct rxe_pool_entry *obj);
- enum rxe_pool_flags flags;
+ void (*cleanup)(struct rxe_pool_elem *elem);
u32 min_index;
u32 max_index;
- size_t key_offset;
- size_t key_size;
+ u32 max_elem;
} rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
- .name = "rxe-uc",
+ .name = "uc",
.size = sizeof(struct rxe_ucontext),
- .elem_offset = offsetof(struct rxe_ucontext, pelem),
- .flags = RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_ucontext, elem),
+ .min_index = 1,
+ .max_index = UINT_MAX,
+ .max_elem = UINT_MAX,
},
[RXE_TYPE_PD] = {
- .name = "rxe-pd",
+ .name = "pd",
.size = sizeof(struct rxe_pd),
- .elem_offset = offsetof(struct rxe_pd, pelem),
- .flags = RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_pd, elem),
+ .min_index = 1,
+ .max_index = UINT_MAX,
+ .max_elem = UINT_MAX,
},
[RXE_TYPE_AH] = {
- .name = "rxe-ah",
+ .name = "ah",
.size = sizeof(struct rxe_ah),
- .elem_offset = offsetof(struct rxe_ah, pelem),
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_ah, elem),
.min_index = RXE_MIN_AH_INDEX,
.max_index = RXE_MAX_AH_INDEX,
+ .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1,
},
[RXE_TYPE_SRQ] = {
- .name = "rxe-srq",
+ .name = "srq",
.size = sizeof(struct rxe_srq),
- .elem_offset = offsetof(struct rxe_srq, pelem),
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_srq, elem),
+ .cleanup = rxe_srq_cleanup,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
+ .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
},
[RXE_TYPE_QP] = {
- .name = "rxe-qp",
+ .name = "qp",
.size = sizeof(struct rxe_qp),
- .elem_offset = offsetof(struct rxe_qp, pelem),
+ .elem_offset = offsetof(struct rxe_qp, elem),
.cleanup = rxe_qp_cleanup,
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_QP_INDEX,
.max_index = RXE_MAX_QP_INDEX,
+ .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1,
},
[RXE_TYPE_CQ] = {
- .name = "rxe-cq",
+ .name = "cq",
.size = sizeof(struct rxe_cq),
- .elem_offset = offsetof(struct rxe_cq, pelem),
- .flags = RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_cq, elem),
.cleanup = rxe_cq_cleanup,
+ .min_index = 1,
+ .max_index = UINT_MAX,
+ .max_elem = UINT_MAX,
},
[RXE_TYPE_MR] = {
- .name = "rxe-mr",
+ .name = "mr",
.size = sizeof(struct rxe_mr),
- .elem_offset = offsetof(struct rxe_mr, pelem),
+ .elem_offset = offsetof(struct rxe_mr, elem),
.cleanup = rxe_mr_cleanup,
- .flags = RXE_POOL_INDEX,
.min_index = RXE_MIN_MR_INDEX,
.max_index = RXE_MAX_MR_INDEX,
+ .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
},
[RXE_TYPE_MW] = {
- .name = "rxe-mw",
+ .name = "mw",
.size = sizeof(struct rxe_mw),
- .elem_offset = offsetof(struct rxe_mw, pelem),
+ .elem_offset = offsetof(struct rxe_mw, elem),
.cleanup = rxe_mw_cleanup,
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_MW_INDEX,
.max_index = RXE_MAX_MW_INDEX,
- },
- [RXE_TYPE_MC_GRP] = {
- .name = "rxe-mc_grp",
- .size = sizeof(struct rxe_mc_grp),
- .elem_offset = offsetof(struct rxe_mc_grp, pelem),
- .cleanup = rxe_mc_cleanup,
- .flags = RXE_POOL_KEY,
- .key_offset = offsetof(struct rxe_mc_grp, mgid),
- .key_size = sizeof(union ib_gid),
- },
- [RXE_TYPE_MC_ELEM] = {
- .name = "rxe-mc_elem",
- .size = sizeof(struct rxe_mc_elem),
- .elem_offset = offsetof(struct rxe_mc_elem, pelem),
+ .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
},
};
-static inline const char *pool_name(struct rxe_pool *pool)
-{
- return rxe_type_info[pool->type].name;
-}
-
-static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
-{
- int err = 0;
-
- if ((max - min + 1) < pool->max_elem) {
- pr_warn("not enough indices for max_elem\n");
- err = -EINVAL;
- goto out;
- }
-
- pool->index.max_index = max;
- pool->index.min_index = min;
-
- pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL);
- if (!pool->index.table) {
- err = -ENOMEM;
- goto out;
- }
-
-out:
- return err;
-}
-
-int rxe_pool_init(
- struct rxe_dev *rxe,
- struct rxe_pool *pool,
- enum rxe_elem_type type,
- unsigned int max_elem)
+void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
+ enum rxe_elem_type type)
{
- int err = 0;
- size_t size = rxe_type_info[type].size;
+ const struct rxe_type_info *info = &rxe_type_info[type];
memset(pool, 0, sizeof(*pool));
pool->rxe = rxe;
+ pool->name = info->name;
pool->type = type;
- pool->max_elem = max_elem;
- pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);
- pool->flags = rxe_type_info[type].flags;
- pool->index.tree = RB_ROOT;
- pool->key.tree = RB_ROOT;
- pool->cleanup = rxe_type_info[type].cleanup;
+ pool->max_elem = info->max_elem;
+ pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN);
+ pool->elem_offset = info->elem_offset;
+ pool->cleanup = info->cleanup;
atomic_set(&pool->num_elem, 0);
- rwlock_init(&pool->pool_lock);
-
- if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
- err = rxe_pool_init_index(pool,
- rxe_type_info[type].max_index,
- rxe_type_info[type].min_index);
- if (err)
- goto out;
- }
-
- if (rxe_type_info[type].flags & RXE_POOL_KEY) {
- pool->key.key_offset = rxe_type_info[type].key_offset;
- pool->key.key_size = rxe_type_info[type].key_size;
- }
-
-out:
- return err;
+ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
+ pool->limit.min = info->min_index;
+ pool->limit.max = info->max_index;
}
void rxe_pool_cleanup(struct rxe_pool *pool)
{
- if (atomic_read(&pool->num_elem) > 0)
- pr_warn("%s pool destroyed with unfree'd elem\n",
- pool_name(pool));
-
- bitmap_free(pool->index.table);
-}
-
-static u32 alloc_index(struct rxe_pool *pool)
-{
- u32 index;
- u32 range = pool->index.max_index - pool->index.min_index + 1;
-
- index = find_next_zero_bit(pool->index.table, range, pool->index.last);
- if (index >= range)
- index = find_first_zero_bit(pool->index.table, range);
-
- WARN_ON_ONCE(index >= range);
- set_bit(index, pool->index.table);
- pool->index.last = index;
- return index + pool->index.min_index;
-}
-
-static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
-{
- struct rb_node **link = &pool->index.tree.rb_node;
- struct rb_node *parent = NULL;
- struct rxe_pool_entry *elem;
-
- while (*link) {
- parent = *link;
- elem = rb_entry(parent, struct rxe_pool_entry, index_node);
-
- if (elem->index == new->index) {
- pr_warn("element already exists!\n");
- return -EINVAL;
- }
-
- if (elem->index > new->index)
- link = &(*link)->rb_left;
- else
- link = &(*link)->rb_right;
- }
-
- rb_link_node(&new->index_node, parent, link);
- rb_insert_color(&new->index_node, &pool->index.tree);
-
- return 0;
-}
-
-static int rxe_insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
-{
- struct rb_node **link = &pool->key.tree.rb_node;
- struct rb_node *parent = NULL;
- struct rxe_pool_entry *elem;
- int cmp;
-
- while (*link) {
- parent = *link;
- elem = rb_entry(parent, struct rxe_pool_entry, key_node);
-
- cmp = memcmp((u8 *)elem + pool->key.key_offset,
- (u8 *)new + pool->key.key_offset, pool->key.key_size);
-
- if (cmp == 0) {
- pr_warn("key already exists!\n");
- return -EINVAL;
- }
-
- if (cmp > 0)
- link = &(*link)->rb_left;
- else
- link = &(*link)->rb_right;
- }
-
- rb_link_node(&new->key_node, parent, link);
- rb_insert_color(&new->key_node, &pool->key.tree);
-
- return 0;
-}
-
-int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key)
-{
- struct rxe_pool *pool = elem->pool;
- int err;
-
- memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
- err = rxe_insert_key(pool, elem);
-
- return err;
-}
-
-int __rxe_add_key(struct rxe_pool_entry *elem, void *key)
-{
- struct rxe_pool *pool = elem->pool;
- unsigned long flags;
- int err;
-
- write_lock_irqsave(&pool->pool_lock, flags);
- err = __rxe_add_key_locked(elem, key);
- write_unlock_irqrestore(&pool->pool_lock, flags);
-
- return err;
-}
-
-void __rxe_drop_key_locked(struct rxe_pool_entry *elem)
-{
- struct rxe_pool *pool = elem->pool;
-
- rb_erase(&elem->key_node, &pool->key.tree);
-}
-
-void __rxe_drop_key(struct rxe_pool_entry *elem)
-{
- struct rxe_pool *pool = elem->pool;
- unsigned long flags;
-
- write_lock_irqsave(&pool->pool_lock, flags);
- __rxe_drop_key_locked(elem);
- write_unlock_irqrestore(&pool->pool_lock, flags);
+ WARN_ON(!xa_empty(&pool->xa));
}
-int __rxe_add_index_locked(struct rxe_pool_entry *elem)
-{
- struct rxe_pool *pool = elem->pool;
- int err;
-
- elem->index = alloc_index(pool);
- err = rxe_insert_index(pool, elem);
-
- return err;
-}
-
-int __rxe_add_index(struct rxe_pool_entry *elem)
+void *rxe_alloc(struct rxe_pool *pool)
{
- struct rxe_pool *pool = elem->pool;
- unsigned long flags;
+ struct rxe_pool_elem *elem;
+ void *obj;
int err;
- write_lock_irqsave(&pool->pool_lock, flags);
- err = __rxe_add_index_locked(elem);
- write_unlock_irqrestore(&pool->pool_lock, flags);
-
- return err;
-}
-
-void __rxe_drop_index_locked(struct rxe_pool_entry *elem)
-{
- struct rxe_pool *pool = elem->pool;
-
- clear_bit(elem->index - pool->index.min_index, pool->index.table);
- rb_erase(&elem->index_node, &pool->index.tree);
-}
-
-void __rxe_drop_index(struct rxe_pool_entry *elem)
-{
- struct rxe_pool *pool = elem->pool;
- unsigned long flags;
-
- write_lock_irqsave(&pool->pool_lock, flags);
- __rxe_drop_index_locked(elem);
- write_unlock_irqrestore(&pool->pool_lock, flags);
-}
-
-void *rxe_alloc_locked(struct rxe_pool *pool)
-{
- const struct rxe_type_info *info = &rxe_type_info[pool->type];
- struct rxe_pool_entry *elem;
- u8 *obj;
+ if (WARN_ON(!(pool->type == RXE_TYPE_MR)))
+ return NULL;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_cnt;
+ goto err_cnt;
- obj = kzalloc(info->size, GFP_ATOMIC);
+ obj = kzalloc(pool->elem_size, GFP_KERNEL);
if (!obj)
- goto out_cnt;
+ goto err_cnt;
- elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
+ elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset);
elem->pool = pool;
+ elem->obj = obj;
kref_init(&elem->ref_cnt);
+ init_completion(&elem->complete);
+
+ /* allocate index in array but leave pointer as NULL so it
+ * can't be looked up until rxe_finalize() is called
+ */
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
+ &pool->next, GFP_KERNEL);
+ if (err < 0)
+ goto err_free;
return obj;
-out_cnt:
+err_free:
+ kfree(obj);
+err_cnt:
atomic_dec(&pool->num_elem);
return NULL;
}
-void *rxe_alloc(struct rxe_pool *pool)
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
+ bool sleepable)
{
- const struct rxe_type_info *info = &rxe_type_info[pool->type];
- struct rxe_pool_entry *elem;
- u8 *obj;
-
- if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_cnt;
+ int err;
+ gfp_t gfp_flags;
- obj = kzalloc(info->size, GFP_KERNEL);
- if (!obj)
- goto out_cnt;
+ if (WARN_ON(pool->type == RXE_TYPE_MR))
+ return -EINVAL;
- elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
+ if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
+ goto err_cnt;
elem->pool = pool;
+ elem->obj = (u8 *)elem - pool->elem_offset;
kref_init(&elem->ref_cnt);
+ init_completion(&elem->complete);
- return obj;
-
-out_cnt:
- atomic_dec(&pool->num_elem);
- return NULL;
-}
-
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
-{
- if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_cnt;
+ /* AH objects are unique in that the create_ah verb
+ * can be called in atomic context. If the create_ah
+ * call is not sleepable use GFP_ATOMIC.
+ */
+ gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC;
- elem->pool = pool;
- kref_init(&elem->ref_cnt);
+ if (sleepable)
+ might_sleep();
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
+ &pool->next, gfp_flags);
+ if (err < 0)
+ goto err_cnt;
return 0;
-out_cnt:
+err_cnt:
atomic_dec(&pool->num_elem);
return -EINVAL;
}
-void rxe_elem_release(struct kref *kref)
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
- struct rxe_pool_entry *elem =
- container_of(kref, struct rxe_pool_entry, ref_cnt);
- struct rxe_pool *pool = elem->pool;
- const struct rxe_type_info *info = &rxe_type_info[pool->type];
- u8 *obj;
+ struct rxe_pool_elem *elem;
+ struct xarray *xa = &pool->xa;
+ void *obj;
+
+ rcu_read_lock();
+ elem = xa_load(xa, index);
+ if (elem && kref_get_unless_zero(&elem->ref_cnt))
+ obj = elem->obj;
+ else
+ obj = NULL;
+ rcu_read_unlock();
- if (pool->cleanup)
- pool->cleanup(elem);
+ return obj;
+}
- if (!(pool->flags & RXE_POOL_NO_ALLOC)) {
- obj = (u8 *)elem - info->elem_offset;
- kfree(obj);
- }
+static void rxe_elem_release(struct kref *kref)
+{
+ struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
- atomic_dec(&pool->num_elem);
+ complete(&elem->complete);
}
-void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index)
+int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
{
- const struct rxe_type_info *info = &rxe_type_info[pool->type];
- struct rb_node *node;
- struct rxe_pool_entry *elem;
- u8 *obj;
-
- node = pool->index.tree.rb_node;
-
- while (node) {
- elem = rb_entry(node, struct rxe_pool_entry, index_node);
-
- if (elem->index > index)
- node = node->rb_left;
- else if (elem->index < index)
- node = node->rb_right;
- else
- break;
- }
-
- if (node) {
- kref_get(&elem->ref_cnt);
- obj = (u8 *)elem - info->elem_offset;
+ struct rxe_pool *pool = elem->pool;
+ struct xarray *xa = &pool->xa;
+ static int timeout = RXE_POOL_TIMEOUT;
+ int ret, err = 0;
+ void *xa_ret;
+
+ if (sleepable)
+ might_sleep();
+
+ /* erase xarray entry to prevent looking up
+ * the pool elem from its index
+ */
+ xa_ret = xa_erase(xa, elem->index);
+ WARN_ON(xa_err(xa_ret));
+
+ /* if this is the last call to rxe_put complete the
+ * object. It is safe to touch obj->elem after this since
+ * it is freed below
+ */
+ __rxe_put(elem);
+
+ /* wait until all references to the object have been
+ * dropped before final object specific cleanup and
+ * return to rdma-core
+ */
+ if (sleepable) {
+ if (!completion_done(&elem->complete) && timeout) {
+ ret = wait_for_completion_timeout(&elem->complete,
+ timeout);
+
+ /* Shouldn't happen. There are still references to
+ * the object but, rather than deadlock, free the
+ * object or pass back to rdma-core.
+ */
+ if (WARN_ON(!ret))
+ err = -EINVAL;
+ }
} else {
- obj = NULL;
+ unsigned long until = jiffies + timeout;
+
+ /* AH objects are unique in that the destroy_ah verb
+ * can be called in atomic context. This delay
+ * replaces the wait_for_completion call above
+ * when the destroy_ah call is not sleepable
+ */
+ while (!completion_done(&elem->complete) &&
+ time_before(jiffies, until))
+ mdelay(1);
+
+ if (WARN_ON(!completion_done(&elem->complete)))
+ err = -EINVAL;
}
- return obj;
-}
+ if (pool->cleanup)
+ pool->cleanup(elem);
-void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
-{
- u8 *obj;
- unsigned long flags;
+ if (pool->type == RXE_TYPE_MR)
+ kfree_rcu(elem->obj);
- read_lock_irqsave(&pool->pool_lock, flags);
- obj = rxe_pool_get_index_locked(pool, index);
- read_unlock_irqrestore(&pool->pool_lock, flags);
+ atomic_dec(&pool->num_elem);
- return obj;
+ return err;
}
-void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key)
+int __rxe_get(struct rxe_pool_elem *elem)
{
- const struct rxe_type_info *info = &rxe_type_info[pool->type];
- struct rb_node *node;
- struct rxe_pool_entry *elem;
- u8 *obj;
- int cmp;
-
- node = pool->key.tree.rb_node;
-
- while (node) {
- elem = rb_entry(node, struct rxe_pool_entry, key_node);
-
- cmp = memcmp((u8 *)elem + pool->key.key_offset,
- key, pool->key.key_size);
-
- if (cmp > 0)
- node = node->rb_left;
- else if (cmp < 0)
- node = node->rb_right;
- else
- break;
- }
-
- if (node) {
- kref_get(&elem->ref_cnt);
- obj = (u8 *)elem - info->elem_offset;
- } else {
- obj = NULL;
- }
-
- return obj;
+ return kref_get_unless_zero(&elem->ref_cnt);
}
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+int __rxe_put(struct rxe_pool_elem *elem)
{
- u8 *obj;
- unsigned long flags;
+ return kref_put(&elem->ref_cnt, rxe_elem_release);
+}
- read_lock_irqsave(&pool->pool_lock, flags);
- obj = rxe_pool_get_key_locked(pool, key);
- read_unlock_irqrestore(&pool->pool_lock, flags);
+void __rxe_finalize(struct rxe_pool_elem *elem)
+{
+ void *xa_ret;
- return obj;
+ xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL);
+ WARN_ON(xa_err(xa_ret));
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 8ecd9f870aea..9d83cb32092f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -7,15 +7,6 @@
#ifndef RXE_POOL_H
#define RXE_POOL_H
-#define RXE_POOL_ALIGN (16)
-#define RXE_POOL_CACHE_FLAGS (0)
-
-enum rxe_pool_flags {
- RXE_POOL_INDEX = BIT(1),
- RXE_POOL_KEY = BIT(2),
- RXE_POOL_NO_ALLOC = BIT(4),
-};
-
enum rxe_elem_type {
RXE_TYPE_UC,
RXE_TYPE_PD,
@@ -25,137 +16,70 @@ enum rxe_elem_type {
RXE_TYPE_CQ,
RXE_TYPE_MR,
RXE_TYPE_MW,
- RXE_TYPE_MC_GRP,
- RXE_TYPE_MC_ELEM,
RXE_NUM_TYPES, /* keep me last */
};
-struct rxe_pool_entry;
-
-struct rxe_pool_entry {
+struct rxe_pool_elem {
struct rxe_pool *pool;
+ void *obj;
struct kref ref_cnt;
struct list_head list;
-
- /* only used if keyed */
- struct rb_node key_node;
-
- /* only used if indexed */
- struct rb_node index_node;
+ struct completion complete;
u32 index;
};
struct rxe_pool {
struct rxe_dev *rxe;
- rwlock_t pool_lock; /* protects pool add/del/search */
- size_t elem_size;
- void (*cleanup)(struct rxe_pool_entry *obj);
- enum rxe_pool_flags flags;
+ const char *name;
+ void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_elem_type type;
unsigned int max_elem;
atomic_t num_elem;
+ size_t elem_size;
+ size_t elem_offset;
- /* only used if indexed */
- struct {
- struct rb_root tree;
- unsigned long *table;
- u32 last;
- u32 max_index;
- u32 min_index;
- } index;
-
- /* only used if keyed */
- struct {
- struct rb_root tree;
- size_t key_offset;
- size_t key_size;
- } key;
+ struct xarray xa;
+ struct xa_limit limit;
+ u32 next;
};
/* initialize a pool of objects with given limit on
* number of elements. gets parameters from rxe_type_info
* pool elements will be allocated out of a slab cache
*/
-int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
- enum rxe_elem_type type, u32 max_elem);
+void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
+ enum rxe_elem_type type);
/* free resources from object pool */
void rxe_pool_cleanup(struct rxe_pool *pool);
-/* allocate an object from pool holding and not holding the pool lock */
-void *rxe_alloc_locked(struct rxe_pool *pool);
-
+/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
/* connect already allocated object to pool */
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
-
-#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem)
-
-/* assign an index to an indexed object and insert object into
- * pool's rb tree holding and not holding the pool_lock
- */
-int __rxe_add_index_locked(struct rxe_pool_entry *elem);
-
-#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem)
-
-int __rxe_add_index(struct rxe_pool_entry *elem);
-
-#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
-
-/* drop an index and remove object from rb tree
- * holding and not holding the pool_lock
- */
-void __rxe_drop_index_locked(struct rxe_pool_entry *elem);
-
-#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->pelem)
-
-void __rxe_drop_index(struct rxe_pool_entry *elem);
-
-#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem)
-
-/* assign a key to a keyed object and insert object into
- * pool's rb tree holding and not holding pool_lock
- */
-int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key);
-
-#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key)
-
-int __rxe_add_key(struct rxe_pool_entry *elem, void *key);
-
-#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
-
-/* remove elem from rb tree holding and not holding the pool_lock */
-void __rxe_drop_key_locked(struct rxe_pool_entry *elem);
-
-#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->pelem)
-
-void __rxe_drop_key(struct rxe_pool_entry *elem);
-
-#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem)
-
-/* lookup an indexed object from index holding and not holding the pool_lock.
- * takes a reference on object
- */
-void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index);
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
+ bool sleepable);
+#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem, true)
+#define rxe_add_to_pool_ah(pool, obj, sleepable) __rxe_add_to_pool(pool, \
+ &(obj)->elem, sleepable)
+/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
-/* lookup keyed object from key holding and not holding the pool_lock.
- * takes a reference on the objecti
- */
-void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key);
+int __rxe_get(struct rxe_pool_elem *elem);
+#define rxe_get(obj) __rxe_get(&(obj)->elem)
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
+int __rxe_put(struct rxe_pool_elem *elem);
+#define rxe_put(obj) __rxe_put(&(obj)->elem)
-/* cleanup an object when all references are dropped */
-void rxe_elem_release(struct kref *kref);
+int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable);
+#define rxe_cleanup(obj) __rxe_cleanup(&(obj)->elem, true)
+#define rxe_cleanup_ah(obj, sleepable) __rxe_cleanup(&(obj)->elem, sleepable)
-/* take a reference on an object */
-#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt)
+#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt)
-/* drop a reference on an object */
-#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release)
+void __rxe_finalize(struct rxe_pool_elem *elem);
+#define rxe_finalize(obj) __rxe_finalize(&(obj)->elem)
#endif /* RXE_POOL_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 54b8711321c1..a62bab88415c 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -19,34 +19,34 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
if (cap->max_send_wr > rxe->attr.max_qp_wr) {
- pr_warn("invalid send wr = %d > %d\n",
- cap->max_send_wr, rxe->attr.max_qp_wr);
+ pr_debug("invalid send wr = %u > %d\n",
+ cap->max_send_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_send_sge > rxe->attr.max_send_sge) {
- pr_warn("invalid send sge = %d > %d\n",
- cap->max_send_sge, rxe->attr.max_send_sge);
+ pr_debug("invalid send sge = %u > %d\n",
+ cap->max_send_sge, rxe->attr.max_send_sge);
goto err1;
}
if (!has_srq) {
if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
- pr_warn("invalid recv wr = %d > %d\n",
- cap->max_recv_wr, rxe->attr.max_qp_wr);
+ pr_debug("invalid recv wr = %u > %d\n",
+ cap->max_recv_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
- pr_warn("invalid recv sge = %d > %d\n",
- cap->max_recv_sge, rxe->attr.max_recv_sge);
+ pr_debug("invalid recv sge = %u > %d\n",
+ cap->max_recv_sge, rxe->attr.max_recv_sge);
goto err1;
}
}
if (cap->max_inline_data > rxe->max_inline_data) {
- pr_warn("invalid max inline data = %d > %d\n",
- cap->max_inline_data, rxe->max_inline_data);
+ pr_debug("invalid max inline data = %u > %d\n",
+ cap->max_inline_data, rxe->max_inline_data);
goto err1;
}
@@ -63,7 +63,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
int port_num = init->port_num;
switch (init->qp_type) {
- case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_RC:
case IB_QPT_UC:
@@ -74,28 +73,23 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
}
if (!init->recv_cq || !init->send_cq) {
- pr_warn("missing cq\n");
+ pr_debug("missing cq\n");
goto err1;
}
if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
goto err1;
- if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
+ if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
- pr_warn("invalid port = %d\n", port_num);
+ pr_debug("invalid port = %d\n", port_num);
goto err1;
}
port = &rxe->port;
- if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
- pr_warn("SMI QP exists for port %d\n", port_num);
- goto err1;
- }
-
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
- pr_warn("GSI QP exists for port %d\n", port_num);
+ pr_debug("GSI QP exists for port %d\n", port_num);
goto err1;
}
}
@@ -126,21 +120,15 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
struct resp_res *res = &qp->resp.resources[i];
- free_rd_atomic_resource(qp, res);
+ free_rd_atomic_resource(res);
}
kfree(qp->resp.resources);
qp->resp.resources = NULL;
}
}
-void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
+void free_rd_atomic_resource(struct resp_res *res)
{
- if (res->type == RXE_ATOMIC_MASK) {
- kfree_skb(res->atomic.skb);
- } else if (res->type == RXE_READ_MASK) {
- if (res->read.mr)
- rxe_drop_ref(res->read.mr);
- }
res->type = 0;
}
@@ -152,7 +140,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
if (qp->resp.resources) {
for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
res = &qp->resp.resources[i];
- free_rd_atomic_resource(qp, res);
+ free_rd_atomic_resource(res);
}
}
}
@@ -167,16 +155,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->attr.path_mtu = 1;
qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu);
- qpn = qp->pelem.index;
+ qpn = qp->elem.index;
port = &rxe->port;
switch (init->qp_type) {
- case IB_QPT_SMI:
- qp->ibqp.qp_num = 0;
- port->qp_smi_index = qpn;
- qp->attr.port_num = init->port_num;
- break;
-
case IB_QPT_GSI:
qp->ibqp.qp_num = 1;
port->qp_gsi_index = qpn;
@@ -188,11 +170,16 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
break;
}
- INIT_LIST_HEAD(&qp->grp_list);
-
- spin_lock_init(&qp->grp_lock);
spin_lock_init(&qp->state_lock);
+ spin_lock_init(&qp->req.task.state_lock);
+ spin_lock_init(&qp->resp.task.state_lock);
+ spin_lock_init(&qp->comp.task.state_lock);
+
+ spin_lock_init(&qp->sq.sq_lock);
+ spin_lock_init(&qp->rq.producer_lock);
+ spin_lock_init(&qp->rq.consumer_lock);
+
atomic_set(&qp->ssn, 0);
atomic_set(&qp->skb_out, 0);
}
@@ -217,8 +204,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
* the port number must be in the Dynamic Ports range
* (0xc000 - 0xffff).
*/
- qp->src_port = RXE_ROCE_V2_SPORT +
- (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff);
qp->sq.max_wr = init->cap.max_send_wr;
/* These caps are limited by rxe_qp_chk_cap() done by the caller */
@@ -250,15 +236,15 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
QUEUE_TYPE_FROM_CLIENT);
qp->req.state = QP_STATE_RESET;
+ qp->comp.state = QP_STATE_RESET;
qp->req.opcode = -1;
qp->comp.opcode = -1;
- spin_lock_init(&qp->sq.sq_lock);
skb_queue_head_init(&qp->req_pkts);
- rxe_init_task(rxe, &qp->req.task, qp,
+ rxe_init_task(&qp->req.task, qp,
rxe_requester, "req");
- rxe_init_task(rxe, &qp->comp.task, qp,
+ rxe_init_task(&qp->comp.task, qp,
rxe_completer, "comp");
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
@@ -304,12 +290,9 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
}
}
- spin_lock_init(&qp->rq.producer_lock);
- spin_lock_init(&qp->rq.consumer_lock);
-
skb_queue_head_init(&qp->resp_pkts);
- rxe_init_task(rxe, &qp->resp.task, qp,
+ rxe_init_task(&qp->resp.task, qp,
rxe_responder, "resp");
qp->resp.opcode = OPCODE_NONE;
@@ -331,17 +314,20 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
- rxe_add_ref(pd);
- rxe_add_ref(rcq);
- rxe_add_ref(scq);
+ rxe_get(pd);
+ rxe_get(rcq);
+ rxe_get(scq);
if (srq)
- rxe_add_ref(srq);
+ rxe_get(srq);
qp->pd = pd;
qp->rcq = rcq;
qp->scq = scq;
qp->srq = srq;
+ atomic_inc(&rcq->num_wq);
+ atomic_inc(&scq->num_wq);
+
rxe_qp_init_misc(rxe, qp, init);
err = rxe_qp_init_req(rxe, qp, init, udata, uresp);
@@ -361,16 +347,19 @@ err2:
rxe_queue_cleanup(qp->sq.queue);
qp->sq.queue = NULL;
err1:
+ atomic_dec(&rcq->num_wq);
+ atomic_dec(&scq->num_wq);
+
qp->pd = NULL;
qp->rcq = NULL;
qp->scq = NULL;
qp->srq = NULL;
if (srq)
- rxe_drop_ref(srq);
- rxe_drop_ref(scq);
- rxe_drop_ref(rcq);
- rxe_drop_ref(pd);
+ rxe_put(srq);
+ rxe_put(scq);
+ rxe_put(rcq);
+ rxe_put(pd);
return err;
}
@@ -413,7 +402,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
- pr_warn("invalid mask or state for qp\n");
+ pr_debug("invalid mask or state for qp\n");
goto err1;
}
@@ -427,7 +416,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_PORT) {
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
- pr_warn("invalid port %d\n", attr->port_num);
+ pr_debug("invalid port %d\n", attr->port_num);
goto err1;
}
}
@@ -442,12 +431,12 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
goto err1;
if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
- pr_warn("invalid alt port %d\n", attr->alt_port_num);
+ pr_debug("invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
if (attr->alt_timeout > 31) {
- pr_warn("invalid QP alt timeout %d > 31\n",
- attr->alt_timeout);
+ pr_debug("invalid QP alt timeout %d > 31\n",
+ attr->alt_timeout);
goto err1;
}
}
@@ -468,17 +457,16 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
- pr_warn("invalid max_rd_atomic %d > %d\n",
- attr->max_rd_atomic,
- rxe->attr.max_qp_rd_atom);
+ pr_debug("invalid max_rd_atomic %d > %d\n",
+ attr->max_rd_atomic,
+ rxe->attr.max_qp_rd_atom);
goto err1;
}
}
if (mask & IB_QP_TIMEOUT) {
if (attr->timeout > 31) {
- pr_warn("invalid QP timeout %d > 31\n",
- attr->timeout);
+ pr_debug("invalid QP timeout %d > 31\n", attr->timeout);
goto err1;
}
}
@@ -504,6 +492,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* move qp to the reset state */
qp->req.state = QP_STATE_RESET;
+ qp->comp.state = QP_STATE_RESET;
qp->resp.state = QP_STATE_RESET;
/* let state machines reset themselves drain work and packet queues
@@ -521,6 +510,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
atomic_set(&qp->ssn, 0);
qp->req.opcode = -1;
qp->req.need_retry = 0;
+ qp->req.wait_for_rnr_timer = 0;
qp->req.noack_pkts = 0;
qp->resp.msn = 0;
qp->resp.opcode = -1;
@@ -529,7 +519,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
qp->resp.sent_psn_nak = 0;
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -566,6 +556,7 @@ void rxe_qp_error(struct rxe_qp *qp)
{
qp->req.state = QP_STATE_ERROR;
qp->resp.state = QP_STATE_ERROR;
+ qp->comp.state = QP_STATE_ERROR;
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
@@ -703,6 +694,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
pr_debug("qp#%d state -> INIT\n", qp_num(qp));
qp->req.state = QP_STATE_INIT;
qp->resp.state = QP_STATE_INIT;
+ qp->comp.state = QP_STATE_INIT;
break;
case IB_QPS_RTR:
@@ -713,6 +705,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
case IB_QPS_RTS:
pr_debug("qp#%d state -> RTS\n", qp_num(qp));
qp->req.state = QP_STATE_READY;
+ qp->comp.state = QP_STATE_READY;
break;
case IB_QPS_SQD:
@@ -771,9 +764,25 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
return 0;
}
-/* called by the destroy qp verb */
-void rxe_qp_destroy(struct rxe_qp *qp)
+int rxe_qp_chk_destroy(struct rxe_qp *qp)
{
+ /* See IBA o10-2.2.3
+ * An attempt to destroy a QP while attached to a mcast group
+ * will fail immediately.
+ */
+ if (atomic_read(&qp->mcg_num)) {
+ pr_debug("Attempt to destroy QP while attached to multicast group\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/* called when the last reference to the qp is dropped */
+static void rxe_qp_do_cleanup(struct work_struct *work)
+{
+ struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
+
qp->valid = 0;
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
@@ -787,54 +796,54 @@ void rxe_qp_destroy(struct rxe_qp *qp)
rxe_cleanup_task(&qp->comp.task);
/* flush out any receive wr's or pending requests */
- __rxe_do_task(&qp->req.task);
+ if (qp->req.task.func)
+ __rxe_do_task(&qp->req.task);
+
if (qp->sq.queue) {
__rxe_do_task(&qp->comp.task);
__rxe_do_task(&qp->req.task);
}
-}
-
-/* called when the last reference to the qp is dropped */
-static void rxe_qp_do_cleanup(struct work_struct *work)
-{
- struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
-
- rxe_drop_all_mcast_groups(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
if (qp->srq)
- rxe_drop_ref(qp->srq);
+ rxe_put(qp->srq);
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
- if (qp->scq)
- rxe_drop_ref(qp->scq);
- if (qp->rcq)
- rxe_drop_ref(qp->rcq);
- if (qp->pd)
- rxe_drop_ref(qp->pd);
+ if (qp->scq) {
+ atomic_dec(&qp->scq->num_wq);
+ rxe_put(qp->scq);
+ }
- if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
- qp->resp.mr = NULL;
+ if (qp->rcq) {
+ atomic_dec(&qp->rcq->num_wq);
+ rxe_put(qp->rcq);
}
+ if (qp->pd)
+ rxe_put(qp->pd);
+
+ if (qp->resp.mr)
+ rxe_put(qp->resp.mr);
+
if (qp_type(qp) == IB_QPT_RC)
sk_dst_reset(qp->sk->sk);
free_rd_atomic_resources(qp);
- kernel_sock_shutdown(qp->sk, SHUT_RDWR);
- sock_release(qp->sk);
+ if (qp->sk) {
+ kernel_sock_shutdown(qp->sk, SHUT_RDWR);
+ sock_release(qp->sk);
+ }
}
/* called when the last reference to the qp is dropped */
-void rxe_qp_cleanup(struct rxe_pool_entry *arg)
+void rxe_qp_cleanup(struct rxe_pool_elem *elem)
{
- struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem);
+ struct rxe_qp *qp = container_of(elem, typeof(*qp), elem);
execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 6e6e023c1b45..d6dbf5a0058d 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -112,23 +112,25 @@ static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
unsigned int num_elem)
{
enum queue_type type = q->type;
+ u32 new_prod;
u32 prod;
u32 cons;
if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type)))
return -EINVAL;
- prod = queue_get_producer(new_q, type);
+ new_prod = queue_get_producer(new_q, type);
+ prod = queue_get_producer(q, type);
cons = queue_get_consumer(q, type);
- while (!queue_empty(q, type)) {
- memcpy(queue_addr_from_index(new_q, prod),
+ while ((prod - cons) & q->index_mask) {
+ memcpy(queue_addr_from_index(new_q, new_prod),
queue_addr_from_index(q, cons), new_q->elem_size);
- prod = queue_next_index(new_q, prod);
+ new_prod = queue_next_index(new_q, new_prod);
cons = queue_next_index(q, cons);
}
- new_q->buf->producer_index = prod;
+ new_q->buf->producer_index = new_prod;
q->buf->consumer_index = cons;
/* update private index copies */
@@ -151,7 +153,8 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
struct rxe_queue *new_q;
unsigned int num_elem = *num_elem_p;
int err;
- unsigned long flags = 0, flags1;
+ unsigned long producer_flags;
+ unsigned long consumer_flags;
new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type);
if (!new_q)
@@ -165,17 +168,17 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
goto err1;
}
- spin_lock_irqsave(consumer_lock, flags1);
+ spin_lock_irqsave(consumer_lock, consumer_flags);
if (producer_lock) {
- spin_lock_irqsave(producer_lock, flags);
+ spin_lock_irqsave(producer_lock, producer_flags);
err = resize_finish(q, new_q, num_elem);
- spin_unlock_irqrestore(producer_lock, flags);
+ spin_unlock_irqrestore(producer_lock, producer_flags);
} else {
err = resize_finish(q, new_q, num_elem);
}
- spin_unlock_irqrestore(consumer_lock, flags1);
+ spin_unlock_irqrestore(consumer_lock, consumer_flags);
rxe_queue_cleanup(new_q); /* new/old dep on err */
if (err)
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 6227112ef7a2..ed44042782fa 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -7,9 +7,6 @@
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H
-/* for definition of shared struct rxe_queue_buf */
-#include <uapi/rdma/rdma_user_rxe.h>
-
/* Implements a simple circular buffer that is shared between user
* and the driver and can be resized. The requested element size is
* rounded up to a power of 2 and the number of elements in the buffer
@@ -53,6 +50,8 @@ enum queue_type {
QUEUE_TYPE_FROM_DRIVER,
};
+struct rxe_queue_buf;
+
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 6a6cc1fa90e4..434a693cd4a5 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -16,48 +16,36 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
unsigned int pkt_type;
if (unlikely(!qp->valid))
- goto err1;
+ return -EINVAL;
pkt_type = pkt->opcode & 0xe0;
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (unlikely(pkt_type != IB_OPCODE_RC)) {
- pr_warn_ratelimited("bad qp type\n");
- goto err1;
- }
+ if (unlikely(pkt_type != IB_OPCODE_RC))
+ return -EINVAL;
break;
case IB_QPT_UC:
- if (unlikely(pkt_type != IB_OPCODE_UC)) {
- pr_warn_ratelimited("bad qp type\n");
- goto err1;
- }
+ if (unlikely(pkt_type != IB_OPCODE_UC))
+ return -EINVAL;
break;
case IB_QPT_UD:
- case IB_QPT_SMI:
case IB_QPT_GSI:
- if (unlikely(pkt_type != IB_OPCODE_UD)) {
- pr_warn_ratelimited("bad qp type\n");
- goto err1;
- }
+ if (unlikely(pkt_type != IB_OPCODE_UD))
+ return -EINVAL;
break;
default:
- pr_warn_ratelimited("unsupported qp type\n");
- goto err1;
+ return -EINVAL;
}
if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp->resp.state != QP_STATE_READY))
- goto err1;
+ return -EINVAL;
} else if (unlikely(qp->req.state < QP_STATE_READY ||
- qp->req.state > QP_STATE_DRAINED)) {
- goto err1;
- }
+ qp->req.state > QP_STATE_DRAINED))
+ return -EINVAL;
return 0;
-
-err1:
- return -EINVAL;
}
static void set_bad_pkey_cntr(struct rxe_port *port)
@@ -85,26 +73,20 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
pkt->pkey_index = 0;
if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) {
- pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
set_bad_pkey_cntr(port);
- goto err1;
+ return -EINVAL;
}
if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) {
u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
if (unlikely(deth_qkey(pkt) != qkey)) {
- pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n",
- deth_qkey(pkt), qkey, qpn);
set_qkey_viol_cntr(port);
- goto err1;
+ return -EINVAL;
}
}
return 0;
-
-err1:
- return -EINVAL;
}
static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
@@ -113,13 +95,10 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb = PKT_TO_SKB(pkt);
if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC)
- goto done;
+ return 0;
- if (unlikely(pkt->port_num != qp->attr.port_num)) {
- pr_warn_ratelimited("port %d != qp port %d\n",
- pkt->port_num, qp->attr.port_num);
- goto err1;
- }
+ if (unlikely(pkt->port_num != qp->attr.port_num))
+ return -EINVAL;
if (skb->protocol == htons(ETH_P_IP)) {
struct in_addr *saddr =
@@ -127,19 +106,9 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct in_addr *daddr =
&qp->pri_av.dgid_addr._sockaddr_in.sin_addr;
- if (ip_hdr(skb)->daddr != saddr->s_addr) {
- pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n",
- &ip_hdr(skb)->daddr,
- &saddr->s_addr);
- goto err1;
- }
-
- if (ip_hdr(skb)->saddr != daddr->s_addr) {
- pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n",
- &ip_hdr(skb)->saddr,
- &daddr->s_addr);
- goto err1;
- }
+ if ((ip_hdr(skb)->daddr != saddr->s_addr) ||
+ (ip_hdr(skb)->saddr != daddr->s_addr))
+ return -EINVAL;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
struct in6_addr *saddr =
@@ -147,24 +116,12 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct in6_addr *daddr =
&qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr;
- if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) {
- pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n",
- &ipv6_hdr(skb)->daddr, saddr);
- goto err1;
- }
-
- if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) {
- pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n",
- &ipv6_hdr(skb)->saddr, daddr);
- goto err1;
- }
+ if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr)) ||
+ memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr)))
+ return -EINVAL;
}
-done:
return 0;
-
-err1:
- return -EINVAL;
}
static int hdr_check(struct rxe_pkt_info *pkt)
@@ -176,24 +133,18 @@ static int hdr_check(struct rxe_pkt_info *pkt)
int index;
int err;
- if (unlikely(bth_tver(pkt) != BTH_TVER)) {
- pr_warn_ratelimited("bad tver\n");
+ if (unlikely(bth_tver(pkt) != BTH_TVER))
goto err1;
- }
- if (unlikely(qpn == 0)) {
- pr_warn_once("QP 0 not supported");
+ if (unlikely(qpn == 0))
goto err1;
- }
if (qpn != IB_MULTICAST_QPN) {
index = (qpn == 1) ? port->qp_gsi_index : qpn;
qp = rxe_pool_get_index(&rxe->qp_pool, index);
- if (unlikely(!qp)) {
- pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);
+ if (unlikely(!qp))
goto err1;
- }
err = check_type_state(rxe, pkt, qp);
if (unlikely(err))
@@ -207,17 +158,15 @@ static int hdr_check(struct rxe_pkt_info *pkt)
if (unlikely(err))
goto err2;
} else {
- if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) {
- pr_warn_ratelimited("no grh for mcast qpn\n");
+ if (unlikely((pkt->mask & RXE_GRH_MASK) == 0))
goto err1;
- }
}
pkt->qp = qp;
return 0;
err2:
- rxe_drop_ref(qp);
+ rxe_put(qp);
err1:
return -EINVAL;
}
@@ -233,8 +182,8 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
- struct rxe_mc_grp *mcg;
- struct rxe_mc_elem *mce;
+ struct rxe_mcg *mcg;
+ struct rxe_mca *mca;
struct rxe_qp *qp;
union ib_gid dgid;
int err;
@@ -246,19 +195,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));
/* lookup mcast group corresponding to mgid, takes a ref */
- mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
+ mcg = rxe_lookup_mcg(rxe, &dgid);
if (!mcg)
goto drop; /* mcast group not registered */
- spin_lock_bh(&mcg->mcg_lock);
+ spin_lock_bh(&rxe->mcg_lock);
/* this is unreliable datagram service so we let
* failures to deliver a multicast packet to a
* single QP happen and just move on and try
* the rest of them on the list
*/
- list_for_each_entry(mce, &mcg->qp_list, qp_list) {
- qp = mce->qp;
+ list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+ qp = mca->qp;
/* validate qp for incoming packet */
err = check_type_state(rxe, pkt, qp);
@@ -273,7 +222,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
* skb and pass to the QP. Pass the original skb to
* the last QP in the list.
*/
- if (mce->qp_list.next != &mcg->qp_list) {
+ if (mca->qp_list.next != &mcg->qp_list) {
struct sk_buff *cskb;
struct rxe_pkt_info *cpkt;
@@ -288,19 +237,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
cpkt = SKB_TO_PKT(cskb);
cpkt->qp = qp;
- rxe_add_ref(qp);
+ rxe_get(qp);
rxe_rcv_pkt(cpkt, cskb);
} else {
pkt->qp = qp;
- rxe_add_ref(qp);
+ rxe_get(qp);
rxe_rcv_pkt(pkt, skb);
skb = NULL; /* mark consumed */
}
}
- spin_unlock_bh(&mcg->mcg_lock);
+ spin_unlock_bh(&rxe->mcg_lock);
- rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
if (likely(!skb))
return;
@@ -365,10 +314,8 @@ void rxe_rcv(struct sk_buff *skb)
if (unlikely(skb->len < RXE_BTH_BYTES))
goto drop;
- if (rxe_chk_dgid(rxe, skb) < 0) {
- pr_warn_ratelimited("failed checking dgid\n");
+ if (rxe_chk_dgid(rxe, skb) < 0)
goto drop;
- }
pkt->opcode = bth_opcode(pkt);
pkt->psn = bth_psn(pkt);
@@ -397,7 +344,7 @@ void rxe_rcv(struct sk_buff *skb)
drop:
if (pkt->qp)
- rxe_drop_ref(pkt->qp);
+ rxe_put(pkt->qp);
kfree_skb(skb);
ib_device_put(&rxe->ib_dev);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 0c9d2af15f3d..f63771207970 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -15,8 +15,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 opcode);
static inline void retry_first_write_send(struct rxe_qp *qp,
- struct rxe_send_wqe *wqe,
- unsigned int mask, int npsn)
+ struct rxe_send_wqe *wqe, int npsn)
{
int i;
@@ -33,8 +32,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp,
} else {
advance_dma_data(&wqe->dma, to_send);
}
- if (mask & WR_WRITE_MASK)
- wqe->iova += qp->mtu;
}
}
@@ -85,7 +82,7 @@ static void req_retry(struct rxe_qp *qp)
if (mask & WR_WRITE_OR_SEND_MASK) {
npsn = (qp->comp.psn - wqe->first_psn) &
BTH_PSN_MASK;
- retry_first_write_send(qp, wqe, mask, npsn);
+ retry_first_write_send(qp, wqe, npsn);
}
if (mask & WR_READ_MASK) {
@@ -103,14 +100,17 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
- pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
+ pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+
+ /* request a send queue retry */
+ qp->req.need_retry = 1;
+ qp->req.wait_for_rnr_timer = 0;
rxe_run_task(&qp->req.task, 1);
}
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
- unsigned long flags;
struct rxe_queue *q = qp->sq.queue;
unsigned int index = qp->req.wqe_index;
unsigned int cons;
@@ -124,25 +124,23 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
/* check to see if we are drained;
* state_lock used by requester and completer
*/
- spin_lock_irqsave(&qp->state_lock, flags);
+ spin_lock_bh(&qp->state_lock);
do {
if (qp->req.state != QP_STATE_DRAIN) {
/* comp just finished */
- spin_unlock_irqrestore(&qp->state_lock,
- flags);
+ spin_unlock_bh(&qp->state_lock);
break;
}
if (wqe && ((index != cons) ||
(wqe->state != wqe_state_posted))) {
/* comp not done yet */
- spin_unlock_irqrestore(&qp->state_lock,
- flags);
+ spin_unlock_bh(&qp->state_lock);
break;
}
qp->req.state = QP_STATE_DRAINED;
- spin_unlock_irqrestore(&qp->state_lock, flags);
+ spin_unlock_bh(&qp->state_lock);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -166,16 +164,36 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
(wqe->state != wqe_state_processing)))
return NULL;
- if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
- (index != cons))) {
- qp->req.wait_fence = 1;
- return NULL;
- }
-
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
}
+/**
+ * rxe_wqe_is_fenced - check if next wqe is fenced
+ * @qp: the queue pair
+ * @wqe: the next wqe
+ *
+ * Returns: 1 if wqe needs to wait
+ * 0 if wqe is ready to go
+ */
+static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ /* Local invalidate fence (LIF) see IBA 10.6.5.1
+ * Requires ALL previous operations on the send queue
+ * are complete. Make mandatory for the rxe driver.
+ */
+ if (wqe->wr.opcode == IB_WR_LOCAL_INV)
+ return qp->req.wqe_index != queue_get_consumer(qp->sq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
+
+ /* Fence see IBA 10.8.3.3
+ * Requires that all previous read and atomic operations
+ * are complete.
+ */
+ return (wqe->wr.send_flags & IB_SEND_FENCE) &&
+ atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic;
+}
+
static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
{
switch (opcode) {
@@ -311,7 +329,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
case IB_QPT_UC:
return next_opcode_uc(qp, opcode, fits);
- case IB_QPT_SMI:
case IB_QPT_UD:
case IB_QPT_GSI:
switch (opcode) {
@@ -361,38 +378,25 @@ static inline int get_mtu(struct rxe_qp *qp)
}
static struct sk_buff *init_req_packet(struct rxe_qp *qp,
+ struct rxe_av *av,
struct rxe_send_wqe *wqe,
- int opcode, int payload,
+ int opcode, u32 payload,
struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct sk_buff *skb;
struct rxe_send_wr *ibwr = &wqe->wr;
- struct rxe_av *av;
int pad = (-payload) & 0x3;
int paylen;
int solicited;
- u16 pkey;
u32 qp_num;
int ack_req;
/* length from start of bth to end of icrc */
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
-
- /* pkt->hdr, port_num and mask are initialized in ifc layer */
- pkt->rxe = rxe;
- pkt->opcode = opcode;
- pkt->qp = qp;
- pkt->psn = qp->req.psn;
- pkt->mask = rxe_opcode[opcode].mask;
- pkt->paylen = paylen;
- pkt->wqe = wqe;
+ pkt->paylen = paylen;
/* init skb */
- av = rxe_get_av(pkt);
- if (!av)
- return NULL;
-
skb = rxe_init_packet(rxe, av, paylen, pkt);
if (unlikely(!skb))
return NULL;
@@ -404,8 +408,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
(RXE_WRITE_MASK | RXE_IMMDT_MASK));
- pkey = IB_DEFAULT_PKEY_FULL;
-
qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
qp->attr.dest_qp_num;
@@ -414,7 +416,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (ack_req)
qp->req.noack_pkts = 0;
- bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
+ bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num,
ack_req, pkt->psn);
/* init optional headers */
@@ -432,8 +434,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (pkt->mask & RXE_ATMETH_MASK) {
atmeth_set_va(pkt, wqe->iova);
- if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
- opcode == IB_OPCODE_RD_COMPARE_SWAP) {
+ if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
} else {
@@ -453,13 +454,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
return skb;
}
-static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, struct sk_buff *skb,
- int paylen)
+static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
+ struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 payload)
{
int err;
- err = rxe_prepare(pkt, skb);
+ err = rxe_prepare(av, pkt, skb);
if (err)
return err;
@@ -467,19 +468,19 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
- memcpy(payload_addr(pkt), tmp, paylen);
+ memcpy(payload_addr(pkt), tmp, payload);
- wqe->dma.resid -= paylen;
- wqe->dma.sge_offset += paylen;
+ wqe->dma.resid -= payload;
+ wqe->dma.sge_offset += payload;
} else {
err = copy_data(qp->pd, 0, &wqe->dma,
- payload_addr(pkt), paylen,
+ payload_addr(pkt), payload,
RXE_FROM_MR_OBJ);
if (err)
return err;
}
if (bth_pad(pkt)) {
- u8 *pad = payload_addr(pkt) + paylen;
+ u8 *pad = payload_addr(pkt) + payload;
memset(pad, 0, bth_pad(pkt));
}
@@ -503,7 +504,7 @@ static void update_wqe_state(struct rxe_qp *qp,
static void update_wqe_psn(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt,
- int payload)
+ u32 payload)
{
/* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -545,8 +546,7 @@ static void rollback_state(struct rxe_send_wqe *wqe,
qp->req.psn = rollback_psn;
}
-static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, int payload)
+static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->req.opcode = pkt->opcode;
@@ -604,9 +604,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->status = IB_WC_SUCCESS;
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
- if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
- rxe_run_task(&qp->comp.task, 1);
+ /* There is no ack coming for local work requests
+ * which can lead to a deadlock. So go ahead and complete
+ * it now.
+ */
+ rxe_run_task(&qp->comp.task, 1);
return 0;
}
@@ -614,24 +616,39 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
enum rxe_hdr_mask mask;
- int payload;
+ u32 payload;
int mtu;
int opcode;
+ int err;
int ret;
struct rxe_send_wqe rollback_wqe;
u32 rollback_psn;
struct rxe_queue *q = qp->sq.queue;
+ struct rxe_ah *ah;
+ struct rxe_av *av;
- rxe_add_ref(qp);
+ if (!rxe_get(qp))
+ return -EAGAIN;
-next_wqe:
- if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
+ if (unlikely(!qp->valid))
goto exit;
+ if (unlikely(qp->req.state == QP_STATE_ERROR)) {
+ wqe = req_next_wqe(qp);
+ if (wqe)
+ /*
+ * Generate an error completion for error qp state
+ */
+ goto err;
+ else
+ goto exit;
+ }
+
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = queue_get_consumer(q,
QUEUE_TYPE_FROM_CLIENT);
@@ -639,10 +656,17 @@ next_wqe:
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
+ qp->req.wait_for_rnr_timer = 0;
goto exit;
}
- if (unlikely(qp->req.need_retry)) {
+ /* we come here if the retransmit timer has fired
+ * or if the rnr timer has fired. If the retransmit
+ * timer fires while we are processing an RNR NAK wait
+ * until the rnr timer has fired before starting the
+ * retry flow
+ */
+ if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
req_retry(qp);
qp->req.need_retry = 0;
}
@@ -651,12 +675,17 @@ next_wqe:
if (unlikely(!wqe))
goto exit;
+ if (rxe_wqe_is_fenced(qp, wqe)) {
+ qp->req.wait_fence = 1;
+ goto exit;
+ }
+
if (wqe->mask & WR_LOCAL_OP_MASK) {
- ret = rxe_do_local_ops(qp, wqe);
- if (unlikely(ret))
+ err = rxe_do_local_ops(qp, wqe);
+ if (unlikely(err))
goto err;
else
- goto next_wqe;
+ goto done;
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
@@ -676,7 +705,7 @@ next_wqe:
opcode = next_opcode(qp, wqe, wqe->wr.opcode);
if (unlikely(opcode < 0)) {
wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto exit;
+ goto err;
}
mask = rxe_opcode[opcode].mask;
@@ -704,31 +733,51 @@ next_wqe:
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- __rxe_do_task(&qp->comp.task);
- rxe_drop_ref(qp);
- return 0;
+ rxe_run_task(&qp->comp.task, 0);
+ goto done;
}
payload = mtu;
}
- skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
+ pkt.rxe = rxe;
+ pkt.opcode = opcode;
+ pkt.qp = qp;
+ pkt.psn = qp->req.psn;
+ pkt.mask = rxe_opcode[opcode].mask;
+ pkt.wqe = wqe;
+
+ av = rxe_get_av(&pkt, &ah);
+ if (unlikely(!av)) {
+ pr_err("qp#%d Failed no address vector\n", qp_num(qp));
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+ }
+
+ skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
wqe->status = IB_WC_LOC_QP_OP_ERR;
+ if (ah)
+ rxe_put(ah);
goto err;
}
- ret = finish_packet(qp, wqe, &pkt, skb, payload);
- if (unlikely(ret)) {
+ err = finish_packet(qp, av, wqe, &pkt, skb, payload);
+ if (unlikely(err)) {
pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
- if (ret == -EFAULT)
+ if (err == -EFAULT)
wqe->status = IB_WC_LOC_PROT_ERR;
else
wqe->status = IB_WC_LOC_QP_OP_ERR;
kfree_skb(skb);
+ if (ah)
+ rxe_put(ah);
goto err;
}
+ if (ah)
+ rxe_put(ah);
+
/*
* To prevent a race on wqe access between requester and completer,
* wqe members state and psn need to be set before calling
@@ -738,13 +787,14 @@ next_wqe:
save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
- ret = rxe_xmit_packet(qp, &pkt, skb);
- if (ret) {
+
+ err = rxe_xmit_packet(qp, &pkt, skb);
+ if (err) {
qp->need_req_skb = 1;
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
- if (ret == -EAGAIN) {
+ if (err == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
goto exit;
}
@@ -753,15 +803,25 @@ next_wqe:
goto err;
}
- update_state(qp, wqe, &pkt, payload);
-
- goto next_wqe;
+ update_state(qp, &pkt);
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_requester
+ */
+done:
+ ret = 0;
+ goto out;
err:
+ /* update wqe_index for each wqe completion */
+ qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error;
- __rxe_do_task(&qp->comp.task);
-
+ qp->req.state = QP_STATE_ERROR;
+ rxe_run_task(&qp->comp.task, 0);
exit:
- rxe_drop_ref(qp);
- return -EAGAIN;
+ ret = -EAGAIN;
+out:
+ rxe_put(qp);
+
+ return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index e8f435fa6e4d..693081e813ec 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -21,6 +21,7 @@ enum resp_states {
RESPST_CHK_RKEY,
RESPST_EXECUTE,
RESPST_READ_REPLY,
+ RESPST_ATOMIC_REPLY,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -55,6 +56,7 @@ static char *resp_state_name[] = {
[RESPST_CHK_RKEY] = "CHK_RKEY",
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
+ [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -99,7 +101,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
if (qp->resp.state == QP_STATE_ERROR) {
while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
@@ -277,7 +279,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
break;
case IB_QPT_UD:
- case IB_QPT_SMI:
case IB_QPT_GSI:
break;
@@ -297,21 +298,22 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
struct ib_event ev;
unsigned int count;
size_t size;
+ unsigned long flags;
if (srq->error)
return RESPST_ERR_RNR;
- spin_lock_bh(&srq->rq.consumer_lock);
+ spin_lock_irqsave(&srq->rq.consumer_lock, flags);
wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
if (!wqe) {
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
return RESPST_ERR_RNR;
}
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
return RESPST_ERR_MALFORMED_WQE;
}
@@ -327,11 +329,11 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
goto event;
}
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
return RESPST_CHK_LENGTH;
event:
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
@@ -448,7 +450,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+ pr_debug("%s: no MW matches rkey %#x\n",
+ __func__, rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -463,12 +466,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
- rxe_drop_ref(mw);
- rxe_add_ref(mr);
+ rxe_put(mw);
+ rxe_get(mr);
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+ pr_debug("%s: no MR matches rkey %#x\n",
+ __func__, rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -507,9 +511,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
err:
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
if (mw)
- rxe_drop_ref(mw);
+ rxe_put(mw);
return state;
}
@@ -549,50 +553,106 @@ out:
return rc;
}
+static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ int type)
+{
+ struct resp_res *res;
+ u32 pkts;
+
+ res = &qp->resp.resources[qp->resp.res_head];
+ rxe_advance_resp_resource(qp);
+ free_rd_atomic_resource(res);
+
+ res->type = type;
+ res->replay = 0;
+
+ switch (type) {
+ case RXE_READ_MASK:
+ res->read.va = qp->resp.va + qp->resp.offset;
+ res->read.va_org = qp->resp.va + qp->resp.offset;
+ res->read.resid = qp->resp.resid;
+ res->read.length = qp->resp.resid;
+ res->read.rkey = qp->resp.rkey;
+
+ pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
+ res->first_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
+
+ res->state = rdatm_res_state_new;
+ break;
+ case RXE_ATOMIC_MASK:
+ res->first_psn = pkt->psn;
+ res->last_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ break;
+ }
+
+ return res;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
-static enum resp_states process_atomic(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states atomic_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
u64 *vaddr;
enum resp_states ret;
struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+ u64 value;
- if (mr->state != RXE_MR_STATE_VALID) {
- ret = RESPST_ERR_RKEY_VIOLATION;
- goto out;
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
+ qp->resp.res = res;
}
- vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
+ if (!res->replay) {
+ if (mr->state != RXE_MR_STATE_VALID) {
+ ret = RESPST_ERR_RKEY_VIOLATION;
+ goto out;
+ }
- /* check vaddr is 8 bytes aligned. */
- if (!vaddr || (uintptr_t)vaddr & 7) {
- ret = RESPST_ERR_MISALIGNED_ATOMIC;
- goto out;
- }
+ vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset,
+ sizeof(u64));
- spin_lock_bh(&atomic_ops_lock);
+ /* check vaddr is 8 bytes aligned. */
+ if (!vaddr || (uintptr_t)vaddr & 7) {
+ ret = RESPST_ERR_MISALIGNED_ATOMIC;
+ goto out;
+ }
- qp->resp.atomic_orig = *vaddr;
+ spin_lock_bh(&atomic_ops_lock);
+ res->atomic.orig_val = value = *vaddr;
- if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
- pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
- if (*vaddr == atmeth_comp(pkt))
- *vaddr = atmeth_swap_add(pkt);
- } else {
- *vaddr += atmeth_swap_add(pkt);
- }
+ if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
+ if (value == atmeth_comp(pkt))
+ value = atmeth_swap_add(pkt);
+ } else {
+ value += atmeth_swap_add(pkt);
+ }
- spin_unlock_bh(&atomic_ops_lock);
+ *vaddr = value;
+ spin_unlock_bh(&atomic_ops_lock);
- ret = RESPST_NONE;
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+ }
+
+ ret = RESPST_ACKNOWLEDGE;
out:
return ret;
}
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt,
struct rxe_pkt_info *ack,
int opcode,
int payload,
@@ -630,9 +690,9 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
}
if (ack->mask & RXE_ATMACK_MASK)
- atmack_set_orig(ack, qp->resp.atomic_orig);
+ atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
- err = rxe_prepare(ack, skb);
+ err = rxe_prepare(&qp->pri_av, ack, skb);
if (err) {
kfree_skb(skb);
return NULL;
@@ -641,6 +701,59 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
return skb;
}
+/**
+ * rxe_recheck_mr - revalidate MR from rkey and get a reference
+ * @qp: the qp
+ * @rkey: the rkey
+ *
+ * This code allows the MR to be invalidated or deregistered or
+ * the MW if one was used to be invalidated or deallocated.
+ * It is assumed that the access permissions if originally good
+ * are OK and the mappings to be unchanged.
+ *
+ * TODO: If someone reregisters an MR to change its size or
+ * access permissions during the processing of an RDMA read
+ * we should kill the responder resource and complete the
+ * operation with an error.
+ *
+ * Return: mr on success else NULL
+ */
+static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mr *mr;
+ struct rxe_mw *mw;
+
+ if (rkey_is_mw(rkey)) {
+ mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
+ if (!mw)
+ return NULL;
+
+ mr = mw->mr;
+ if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
+ !mr || mr->state != RXE_MR_STATE_VALID) {
+ rxe_put(mw);
+ return NULL;
+ }
+
+ rxe_get(mr);
+ rxe_put(mw);
+
+ return mr;
+ }
+
+ mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ if (!mr)
+ return NULL;
+
+ if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
+ rxe_put(mr);
+ return NULL;
+ }
+
+ return mr;
+}
+
/* RDMA read response. If res is not NULL, then we have a current RDMA request
* being processed or replayed.
*/
@@ -655,53 +768,32 @@ static enum resp_states read_reply(struct rxe_qp *qp,
int opcode;
int err;
struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr;
if (!res) {
- /* This is the first time we process that request. Get a
- * resource
- */
- res = &qp->resp.resources[qp->resp.res_head];
-
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
-
- res->type = RXE_READ_MASK;
- res->replay = 0;
-
- res->read.va = qp->resp.va +
- qp->resp.offset;
- res->read.va_org = qp->resp.va +
- qp->resp.offset;
-
- res->first_psn = req_pkt->psn;
+ res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
+ qp->resp.res = res;
+ }
- if (reth_len(req_pkt)) {
- res->last_psn = (req_pkt->psn +
- (reth_len(req_pkt) + mtu - 1) /
- mtu - 1) & BTH_PSN_MASK;
+ if (res->state == rdatm_res_state_new) {
+ if (!res->replay) {
+ mr = qp->resp.mr;
+ qp->resp.mr = NULL;
} else {
- res->last_psn = res->first_psn;
+ mr = rxe_recheck_mr(qp, res->read.rkey);
+ if (!mr)
+ return RESPST_ERR_RKEY_VIOLATION;
}
- res->cur_psn = req_pkt->psn;
-
- res->read.resid = qp->resp.resid;
- res->read.length = qp->resp.resid;
- res->read.rkey = qp->resp.rkey;
- /* note res inherits the reference to mr from qp */
- res->read.mr = qp->resp.mr;
- qp->resp.mr = NULL;
-
- qp->resp.res = res;
- res->state = rdatm_res_state_new;
- }
-
- if (res->state == rdatm_res_state_new) {
if (res->read.resid <= mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
else
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
} else {
+ mr = rxe_recheck_mr(qp, res->read.rkey);
+ if (!mr)
+ return RESPST_ERR_RKEY_VIOLATION;
+
if (res->read.resid > mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
else
@@ -712,15 +804,17 @@ static enum resp_states read_reply(struct rxe_qp *qp,
payload = min_t(int, res->read.resid, mtu);
- skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
+ skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
- if (!skb)
+ if (!skb) {
+ rxe_put(mr);
return RESPST_ERR_RNR;
+ }
- err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
- payload, RXE_FROM_MR_OBJ);
- if (err)
- pr_err("Failed copying memory\n");
+ rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
+ payload, RXE_FROM_MR_OBJ);
+ if (mr)
+ rxe_put(mr);
if (bth_pad(&ack_pkt)) {
u8 *pad = payload_addr(&ack_pkt) + payload;
@@ -729,10 +823,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
}
err = rxe_xmit_packet(qp, &ack_pkt, skb);
- if (err) {
- pr_err("Failed sending RDMA reply.\n");
+ if (err)
return RESPST_ERR_RNR;
- }
res->read.va += payload;
res->read.resid -= payload;
@@ -771,7 +863,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
if (pkt->mask & RXE_SEND_MASK) {
if (qp_type(qp) == IB_QPT_UD ||
- qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
if (skb->protocol == htons(ETH_P_IP)) {
memset(&hdr.reserved, 0,
@@ -798,9 +889,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.msn++;
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- err = process_atomic(qp, pkt);
- if (err)
- return err;
+ return RESPST_ATOMIC_REPLY;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -814,6 +903,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_ERR_INVALIDATE_RKEY;
}
+ if (pkt->mask & RXE_END_MASK)
+ /* We successfully processed this new request. */
+ qp->resp.msn++;
+
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
@@ -821,11 +914,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
- if (pkt->mask & RXE_COMP_MASK) {
- /* We successfully processed this new request. */
- qp->resp.msn++;
+ if (pkt->mask & RXE_COMP_MASK)
return RESPST_COMPLETE;
- } else if (qp_type(qp) == IB_QPT_RC)
+ else if (qp_type(qp) == IB_QPT_RC)
return RESPST_ACKNOWLEDGE;
else
return RESPST_CLEANUP;
@@ -935,62 +1026,41 @@ finish:
return RESPST_CLEANUP;
}
-static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome, u32 psn)
+
+static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
+ int opcode, const char *msg)
{
- int err = 0;
+ int err;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
- 0, psn, syndrome);
- if (!skb) {
- err = -ENOMEM;
- goto err1;
- }
+ skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
+ if (!skb)
+ return -ENOMEM;
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
- pr_err_ratelimited("Failed sending ack\n");
+ pr_err_ratelimited("Failed sending %s\n", msg);
-err1:
return err;
}
-static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome)
+static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
- int rc = 0;
- struct rxe_pkt_info ack_pkt;
- struct sk_buff *skb;
- struct resp_res *res;
-
- skb = prepare_ack_packet(qp, pkt, &ack_pkt,
- IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
- syndrome);
- if (!skb) {
- rc = -ENOMEM;
- goto out;
- }
+ return send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
+}
- res = &qp->resp.resources[qp->resp.res_head];
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
+static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int ret = send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
- skb_get(skb);
- res->type = RXE_ATOMIC_MASK;
- res->atomic.skb = skb;
- res->first_psn = ack_pkt.psn;
- res->last_psn = ack_pkt.psn;
- res->cur_psn = ack_pkt.psn;
-
- rc = rxe_xmit_packet(qp, &ack_pkt, skb);
- if (rc) {
- pr_err_ratelimited("Failed sending ack\n");
- rxe_drop_ref(qp);
- }
-out:
- return rc;
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+ return ret;
}
static enum resp_states acknowledge(struct rxe_qp *qp,
@@ -1000,11 +1070,11 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
return RESPST_CLEANUP;
if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
- send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);
+ send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
- send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);
+ send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
return RESPST_CLEANUP;
}
@@ -1016,13 +1086,13 @@ static enum resp_states cleanup(struct rxe_qp *qp,
if (pkt) {
skb = skb_dequeue(&qp->req_pkts);
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -1057,7 +1127,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (pkt->mask & RXE_SEND_MASK ||
pkt->mask & RXE_WRITE_MASK) {
/* SEND. Ack again and cleanup. C9-105. */
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1111,14 +1181,11 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn);
if (res) {
- skb_get(res->atomic.skb);
- /* Resend the result. */
- rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
- if (rc) {
- pr_err("Failed resending result. This flow is not handled - skb ignored\n");
- rc = RESPST_CLEANUP;
- goto out;
- }
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_ATOMIC_REPLY;
+ goto out;
}
/* Resource not found. Class D error. Drop the request. */
@@ -1166,7 +1233,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -1180,7 +1247,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
struct rxe_queue *q = qp->rq.queue;
while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
@@ -1198,16 +1265,15 @@ int rxe_responder(void *arg)
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
- int ret = 0;
+ int ret;
- rxe_add_ref(qp);
+ if (!rxe_get(qp))
+ return -EAGAIN;
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
- if (!qp->valid) {
- ret = -EINVAL;
- goto done;
- }
+ if (!qp->valid)
+ goto exit;
switch (qp->resp.state) {
case QP_STATE_RESET:
@@ -1253,6 +1319,9 @@ int rxe_responder(void *arg)
case RESPST_READ_REPLY:
state = read_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_REPLY:
+ state = atomic_reply(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1264,7 +1333,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_PSN_OUT_OF_SEQ:
/* RC only - Class B. Drop packet. */
- send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
+ send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
state = RESPST_CLEANUP;
break;
@@ -1286,7 +1355,7 @@ int rxe_responder(void *arg)
if (qp_type(qp) == IB_QPT_RC) {
rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
- send_ack(qp, pkt, AETH_RNR_NAK |
+ send_ack(qp, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
qp->attr.min_rnr_timer),
pkt->psn);
@@ -1375,7 +1444,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_warn("qp#%d moved to error state\n", qp_num(qp));
+ pr_debug("qp#%d moved to error state\n", qp_num(qp));
rxe_qp_error(qp);
goto exit;
@@ -1384,9 +1453,16 @@ int rxe_responder(void *arg)
}
}
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_responder
+ */
+done:
+ ret = 0;
+ goto out;
exit:
ret = -EAGAIN;
-done:
- rxe_drop_ref(qp);
+out:
+ rxe_put(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index eb1c4c3b3a78..02b39498c370 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -6,64 +6,34 @@
#include <linux/vmalloc.h>
#include "rxe.h"
-#include "rxe_loc.h"
#include "rxe_queue.h"
-int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
- struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
+int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
{
- if (srq && srq->error) {
- pr_warn("srq in error state\n");
+ struct ib_srq_attr *attr = &init->attr;
+
+ if (attr->max_wr > rxe->attr.max_srq_wr) {
+ pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
- if (mask & IB_SRQ_MAX_WR) {
- if (attr->max_wr > rxe->attr.max_srq_wr) {
- pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
- attr->max_wr, rxe->attr.max_srq_wr);
- goto err1;
- }
-
- if (attr->max_wr <= 0) {
- pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
- goto err1;
- }
-
- if (srq && srq->limit && (attr->max_wr < srq->limit)) {
- pr_warn("max_wr (%d) < srq->limit (%d)\n",
- attr->max_wr, srq->limit);
- goto err1;
- }
-
- if (attr->max_wr < RXE_MIN_SRQ_WR)
- attr->max_wr = RXE_MIN_SRQ_WR;
+ if (attr->max_wr <= 0) {
+ pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ goto err1;
}
- if (mask & IB_SRQ_LIMIT) {
- if (attr->srq_limit > rxe->attr.max_srq_wr) {
- pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
- attr->srq_limit, rxe->attr.max_srq_wr);
- goto err1;
- }
+ if (attr->max_wr < RXE_MIN_SRQ_WR)
+ attr->max_wr = RXE_MIN_SRQ_WR;
- if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
- pr_warn("srq_limit (%d) > cur limit(%d)\n",
- attr->srq_limit,
- srq->rq.queue->buf->index_mask);
- goto err1;
- }
+ if (attr->max_sge > rxe->attr.max_srq_sge) {
+ pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+ attr->max_sge, rxe->attr.max_srq_sge);
+ goto err1;
}
- if (mask == IB_SRQ_INIT_MASK) {
- if (attr->max_sge > rxe->attr.max_srq_sge) {
- pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
- attr->max_sge, rxe->attr.max_srq_sge);
- goto err1;
- }
-
- if (attr->max_sge < RXE_MIN_SRQ_SGE)
- attr->max_sge = RXE_MIN_SRQ_SGE;
- }
+ if (attr->max_sge < RXE_MIN_SRQ_SGE)
+ attr->max_sge = RXE_MIN_SRQ_SGE;
return 0;
@@ -83,7 +53,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
srq->ibsrq.event_handler = init->event_handler;
srq->ibsrq.srq_context = init->srq_context;
srq->limit = init->attr.srq_limit;
- srq->srq_num = srq->pelem.index;
+ srq->srq_num = srq->elem.index;
srq->rq.max_wr = init->attr.max_wr;
srq->rq.max_sge = init->attr.max_sge;
@@ -93,8 +63,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
spin_lock_init(&srq->rq.consumer_lock);
type = QUEUE_TYPE_FROM_CLIENT;
- q = rxe_queue_init(rxe, &srq->rq.max_wr,
- srq_wqe_size, type);
+ q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
if (!q) {
pr_warn("unable to allocate queue for srq\n");
return -ENOMEM;
@@ -121,6 +90,57 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
return 0;
}
+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
+{
+ if (srq->error) {
+ pr_warn("srq in error state\n");
+ goto err1;
+ }
+
+ if (mask & IB_SRQ_MAX_WR) {
+ if (attr->max_wr > rxe->attr.max_srq_wr) {
+ pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ attr->max_wr, rxe->attr.max_srq_wr);
+ goto err1;
+ }
+
+ if (attr->max_wr <= 0) {
+ pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ goto err1;
+ }
+
+ if (srq->limit && (attr->max_wr < srq->limit)) {
+ pr_warn("max_wr (%d) < srq->limit (%d)\n",
+ attr->max_wr, srq->limit);
+ goto err1;
+ }
+
+ if (attr->max_wr < RXE_MIN_SRQ_WR)
+ attr->max_wr = RXE_MIN_SRQ_WR;
+ }
+
+ if (mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit > rxe->attr.max_srq_wr) {
+ pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
+ attr->srq_limit, rxe->attr.max_srq_wr);
+ goto err1;
+ }
+
+ if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
+ pr_warn("srq_limit (%d) > cur limit(%d)\n",
+ attr->srq_limit,
+ srq->rq.queue->buf->index_mask);
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata)
@@ -154,3 +174,14 @@ err2:
srq->rq.queue = NULL;
return err;
}
+
+void rxe_srq_cleanup(struct rxe_pool_elem *elem)
+{
+ struct rxe_srq *srq = container_of(elem, typeof(*srq), elem);
+
+ if (srq->pd)
+ rxe_put(srq->pd);
+
+ if (srq->rq.queue)
+ rxe_queue_cleanup(srq->rq.queue);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
deleted file mode 100644
index 666202ddff48..000000000000
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ /dev/null
@@ -1,119 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/*
- * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- */
-
-#include "rxe.h"
-#include "rxe_net.h"
-
-/* Copy argument and remove trailing CR. Return the new length. */
-static int sanitize_arg(const char *val, char *intf, int intf_len)
-{
- int len;
-
- if (!val)
- return 0;
-
- /* Remove newline. */
- for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++)
- intf[len] = val[len];
- intf[len] = 0;
-
- if (len == 0 || (val[len] != 0 && val[len] != '\n'))
- return 0;
-
- return len;
-}
-
-static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
-{
- int len;
- int err = 0;
- char intf[32];
- struct net_device *ndev;
- struct rxe_dev *exists;
-
- if (!rxe_initialized) {
- pr_err("Module parameters are not supported, use rdma link add or rxe_cfg\n");
- return -EAGAIN;
- }
-
- len = sanitize_arg(val, intf, sizeof(intf));
- if (!len) {
- pr_err("add: invalid interface name\n");
- return -EINVAL;
- }
-
- ndev = dev_get_by_name(&init_net, intf);
- if (!ndev) {
- pr_err("interface %s not found\n", intf);
- return -EINVAL;
- }
-
- if (is_vlan_dev(ndev)) {
- pr_err("rxe creation allowed on top of a real device only\n");
- err = -EPERM;
- goto err;
- }
-
- exists = rxe_get_dev_from_net(ndev);
- if (exists) {
- ib_device_put(&exists->ib_dev);
- pr_err("already configured on %s\n", intf);
- err = -EINVAL;
- goto err;
- }
-
- err = rxe_net_add("rxe%d", ndev);
- if (err) {
- pr_err("failed to add %s\n", intf);
- goto err;
- }
-
-err:
- dev_put(ndev);
- return err;
-}
-
-static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
-{
- int len;
- char intf[32];
- struct ib_device *ib_dev;
-
- len = sanitize_arg(val, intf, sizeof(intf));
- if (!len) {
- pr_err("add: invalid interface name\n");
- return -EINVAL;
- }
-
- if (strncmp("all", intf, len) == 0) {
- pr_info("rxe_sys: remove all");
- ib_unregister_driver(RDMA_DRIVER_RXE);
- return 0;
- }
-
- ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE);
- if (!ib_dev) {
- pr_err("not configured on %s\n", intf);
- return -EINVAL;
- }
-
- ib_unregister_device_and_put(ib_dev);
-
- return 0;
-}
-
-static const struct kernel_param_ops rxe_add_ops = {
- .set = rxe_param_set_add,
-};
-
-static const struct kernel_param_ops rxe_remove_ops = {
- .set = rxe_param_set_remove,
-};
-
-module_param_cb(add, &rxe_add_ops, NULL, 0200);
-MODULE_PARM_DESC(add, "DEPRECATED. Create RXE device over network interface");
-module_param_cb(remove, &rxe_remove_ops, NULL, 0200);
-MODULE_PARM_DESC(remove, "DEPRECATED. Remove RXE device over network interface");
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 6951fdcb31bf..ec2b7de1c497 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -8,7 +8,7 @@
#include <linux/interrupt.h>
#include <linux/hardirq.h>
-#include "rxe_task.h"
+#include "rxe.h"
int __rxe_do_task(struct rxe_task *task)
@@ -32,25 +32,25 @@ void rxe_do_task(struct tasklet_struct *t)
{
int cont;
int ret;
- unsigned long flags;
struct rxe_task *task = from_tasklet(task, t, tasklet);
+ unsigned int iterations = RXE_MAX_ITERATIONS;
- spin_lock_irqsave(&task->state_lock, flags);
+ spin_lock_bh(&task->state_lock);
switch (task->state) {
case TASK_STATE_START:
task->state = TASK_STATE_BUSY;
- spin_unlock_irqrestore(&task->state_lock, flags);
+ spin_unlock_bh(&task->state_lock);
break;
case TASK_STATE_BUSY:
task->state = TASK_STATE_ARMED;
fallthrough;
case TASK_STATE_ARMED:
- spin_unlock_irqrestore(&task->state_lock, flags);
+ spin_unlock_bh(&task->state_lock);
return;
default:
- spin_unlock_irqrestore(&task->state_lock, flags);
+ spin_unlock_bh(&task->state_lock);
pr_warn("%s failed with bad state %d\n", __func__, task->state);
return;
}
@@ -59,16 +59,23 @@ void rxe_do_task(struct tasklet_struct *t)
cont = 0;
ret = task->func(task->arg);
- spin_lock_irqsave(&task->state_lock, flags);
+ spin_lock_bh(&task->state_lock);
switch (task->state) {
case TASK_STATE_BUSY:
- if (ret)
+ if (ret) {
task->state = TASK_STATE_START;
- else
+ } else if (iterations--) {
cont = 1;
+ } else {
+ /* reschedule the tasklet and exit
+ * the loop to give up the cpu
+ */
+ tasklet_schedule(&task->tasklet);
+ task->state = TASK_STATE_START;
+ }
break;
- /* soneone tried to run the task since the last time we called
+ /* someone tried to run the task since the last time we called
* func, so we will call one more time regardless of the
* return value
*/
@@ -81,16 +88,15 @@ void rxe_do_task(struct tasklet_struct *t)
pr_warn("%s failed with bad state %d\n", __func__,
task->state);
}
- spin_unlock_irqrestore(&task->state_lock, flags);
+ spin_unlock_bh(&task->state_lock);
} while (cont);
task->ret = ret;
}
-int rxe_init_task(void *obj, struct rxe_task *task,
+int rxe_init_task(struct rxe_task *task,
void *arg, int (*func)(void *), char *name)
{
- task->obj = obj;
task->arg = arg;
task->func = func;
snprintf(task->name, sizeof(task->name), "%s", name);
@@ -106,7 +112,6 @@ int rxe_init_task(void *obj, struct rxe_task *task,
void rxe_cleanup_task(struct rxe_task *task)
{
- unsigned long flags;
bool idle;
/*
@@ -116,9 +121,9 @@ void rxe_cleanup_task(struct rxe_task *task)
task->destroyed = true;
do {
- spin_lock_irqsave(&task->state_lock, flags);
+ spin_lock_bh(&task->state_lock);
idle = (task->state == TASK_STATE_START);
- spin_unlock_irqrestore(&task->state_lock, flags);
+ spin_unlock_bh(&task->state_lock);
} while (!idle);
tasklet_kill(&task->tasklet);
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 11d183fd3338..7f612a1c68a7 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -19,7 +19,6 @@ enum {
* called again.
*/
struct rxe_task {
- void *obj;
struct tasklet_struct tasklet;
int state;
spinlock_t state_lock; /* spinlock for task state */
@@ -35,7 +34,7 @@ struct rxe_task {
* arg => parameter to pass to fcn
* func => function to call until it returns != 0
*/
-int rxe_init_task(void *obj, struct rxe_task *task,
+int rxe_init_task(struct rxe_task *task,
void *arg, int (*func)(void *), char *name);
/* cleanup task */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 0aa0d7e52773..88825edc7dce 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -7,8 +7,8 @@
#include <linux/dma-mapping.h>
#include <net/addrconf.h>
#include <rdma/uverbs_ioctl.h>
+
#include "rxe.h"
-#include "rxe_loc.h"
#include "rxe_queue.h"
#include "rxe_hw_counters.h"
@@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
{
struct rxe_ucontext *uc = to_ruc(ibuc);
- rxe_drop_ref(uc);
+ rxe_cleanup(uc);
}
static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
- rxe_drop_ref(pd);
+ rxe_cleanup(pd);
return 0;
}
@@ -176,21 +176,20 @@ static int rxe_create_ah(struct ib_ah *ibah,
if (err)
return err;
- err = rxe_add_to_pool(&rxe->ah_pool, ah);
+ err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
return err;
/* create index > 0 */
- rxe_add_index(ah);
- ah->ah_num = ah->pelem.index;
+ ah->ah_num = ah->elem.index;
if (uresp) {
/* only if new user provider */
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
sizeof(uresp->ah_num));
if (err) {
- rxe_drop_index(ah);
- rxe_drop_ref(ah);
+ rxe_cleanup(ah);
return -EFAULT;
}
} else if (ah->is_user) {
@@ -199,6 +198,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
}
rxe_init_av(init_attr->ah_attr, &ah->av);
+ rxe_finalize(ah);
+
return 0;
}
@@ -230,8 +231,8 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
- rxe_drop_index(ah);
- rxe_drop_ref(ah);
+ rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
+
return 0;
}
@@ -261,7 +262,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER);
recv_wqe->wr_id = ibwr->wr_id;
- recv_wqe->num_sge = num_sge;
memcpy(recv_wqe->dma.sge, ibwr->sg_list,
num_sge * sizeof(struct ib_sge));
@@ -289,36 +289,35 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_create_srq_resp __user *uresp = NULL;
- if (init->srq_type != IB_SRQT_BASIC)
- return -EOPNOTSUPP;
-
if (udata) {
if (udata->outlen < sizeof(*uresp))
return -EINVAL;
uresp = udata->outbuf;
}
- err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
+ if (init->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
+ err = rxe_srq_chk_init(rxe, init);
if (err)
- goto err1;
+ return err;
err = rxe_add_to_pool(&rxe->srq_pool, srq);
if (err)
- goto err1;
+ return err;
- rxe_add_ref(pd);
+ rxe_get(pd);
srq->pd = pd;
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
if (err)
- goto err2;
+ goto err_cleanup;
return 0;
-err2:
- rxe_drop_ref(pd);
- rxe_drop_ref(srq);
-err1:
+err_cleanup:
+ rxe_cleanup(srq);
+
return err;
}
@@ -342,16 +341,12 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
err = rxe_srq_chk_attr(rxe, srq, attr, mask);
if (err)
- goto err1;
+ return err;
err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
if (err)
- goto err1;
-
+ return err;
return 0;
-
-err1:
- return err;
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@@ -371,11 +366,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
- if (srq->rq.queue)
- rxe_queue_cleanup(srq->rq.queue);
-
- rxe_drop_ref(srq->pd);
- rxe_drop_ref(srq);
+ rxe_cleanup(srq);
return 0;
}
@@ -383,8 +374,8 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
int err = 0;
- unsigned long flags;
struct rxe_srq *srq = to_rsrq(ibsrq);
+ unsigned long flags;
spin_lock_irqsave(&srq->rq.producer_lock, flags);
@@ -438,16 +429,15 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
if (err)
return err;
- rxe_add_index(qp);
err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
if (err)
goto qp_init;
+ rxe_finalize(qp);
return 0;
qp_init:
- rxe_drop_index(qp);
- rxe_drop_ref(qp);
+ rxe_cleanup(qp);
return err;
}
@@ -469,6 +459,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (err)
goto err1;
+ if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
+ qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ qp->ibqp.qp_num,
+ qp->attr.dest_qp_num);
+
return 0;
err1:
@@ -489,10 +484,13 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct rxe_qp *qp = to_rqp(ibqp);
+ int ret;
+
+ ret = rxe_qp_chk_destroy(qp);
+ if (ret)
+ return ret;
- rxe_qp_destroy(qp);
- rxe_drop_index(qp);
- rxe_drop_ref(qp);
+ rxe_cleanup(qp);
return 0;
}
@@ -527,12 +525,10 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
const struct ib_send_wr *ibwr)
{
wr->wr_id = ibwr->wr_id;
- wr->num_sge = ibwr->num_sge;
wr->opcode = ibwr->opcode;
wr->send_flags = ibwr->send_flags;
if (qp_type(qp) == IB_QPT_UD ||
- qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
struct ib_ah *ibah = ud_wr(ibwr)->ah;
@@ -803,9 +799,15 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
+ /* See IBA C11-17: The CI shall return an error if this Verb is
+ * invoked while a Work Queue is still associated with the CQ.
+ */
+ if (atomic_read(&cq->num_wq))
+ return -EINVAL;
+
rxe_cq_disable(cq);
- rxe_drop_ref(cq);
+ rxe_cleanup(cq);
return 0;
}
@@ -870,9 +872,9 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct rxe_cq *cq = to_rcq(ibcq);
- unsigned long irq_flags;
int ret = 0;
int empty;
+ unsigned long irq_flags;
spin_lock_irqsave(&cq->cq_lock, irq_flags);
if (cq->notify != IB_CQ_NEXT_COMP)
@@ -898,9 +900,11 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
if (!mr)
return ERR_PTR(-ENOMEM);
- rxe_add_index(mr);
- rxe_add_ref(pd);
- rxe_mr_init_dma(pd, access, mr);
+ rxe_get(pd);
+ mr->ibmr.pd = ibpd;
+
+ rxe_mr_init_dma(access, mr);
+ rxe_finalize(mr);
return &mr->ibmr;
}
@@ -922,20 +926,20 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
goto err2;
}
- rxe_add_index(mr);
- rxe_add_ref(pd);
+ rxe_get(pd);
+ mr->ibmr.pd = ibpd;
- err = rxe_mr_init_user(pd, start, length, iova, access, mr);
+ err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
if (err)
goto err3;
+ rxe_finalize(mr);
+
return &mr->ibmr;
err3:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
+ rxe_cleanup(mr);
err2:
return ERR_PTR(err);
}
@@ -957,72 +961,57 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
goto err1;
}
- rxe_add_index(mr);
+ rxe_get(pd);
+ mr->ibmr.pd = ibpd;
- rxe_add_ref(pd);
-
- err = rxe_mr_init_fast(pd, max_num_sg, mr);
+ err = rxe_mr_init_fast(max_num_sg, mr);
if (err)
goto err2;
+ rxe_finalize(mr);
+
return &mr->ibmr;
err2:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
+ rxe_cleanup(mr);
err1:
return ERR_PTR(err);
}
-/* build next_map_set from scatterlist
- * The IB_WR_REG_MR WR will swap map_sets
- */
-static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
+static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
{
struct rxe_mr *mr = to_rmr(ibmr);
- struct rxe_map_set *set = mr->next_map_set;
- int n;
+ struct rxe_map *map;
+ struct rxe_phys_buf *buf;
- set->nbuf = 0;
+ if (unlikely(mr->nbuf == mr->num_buf))
+ return -ENOMEM;
- n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page);
+ map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
+ buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
- set->va = ibmr->iova;
- set->iova = ibmr->iova;
- set->length = ibmr->length;
- set->page_shift = ilog2(ibmr->page_size);
- set->page_mask = ibmr->page_size - 1;
- set->offset = set->iova & set->page_mask;
+ buf->addr = addr;
+ buf->size = ibmr->page_size;
+ mr->nbuf++;
- return n;
+ return 0;
}
-static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
+static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
{
- int err;
- struct rxe_dev *rxe = to_rdev(ibqp->device);
- struct rxe_qp *qp = to_rqp(ibqp);
- struct rxe_mc_grp *grp;
-
- /* takes a ref on grp if successful */
- err = rxe_mcast_get_grp(rxe, mgid, &grp);
- if (err)
- return err;
+ struct rxe_mr *mr = to_rmr(ibmr);
+ int n;
- err = rxe_mcast_add_grp_elem(rxe, qp, grp);
+ mr->nbuf = 0;
- rxe_drop_ref(grp);
- return err;
-}
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
-static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
-{
- struct rxe_dev *rxe = to_rdev(ibqp->device);
- struct rxe_qp *qp = to_rqp(ibqp);
+ mr->page_shift = ilog2(ibmr->page_size);
+ mr->page_mask = ibmr->page_size - 1;
+ mr->offset = ibmr->iova & mr->page_mask;
- return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
+ return n;
}
static ssize_t parent_show(struct device *device,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 35e041450090..5f5cbfcb3569 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -9,7 +9,6 @@
#include <linux/interrupt.h>
#include <linux/workqueue.h>
-#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
#include "rxe_hw_counters.h"
@@ -35,17 +34,17 @@ static inline int psn_compare(u32 psn_a, u32 psn_b)
struct rxe_ucontext {
struct ib_ucontext ibuc;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
};
struct rxe_pd {
struct ib_pd ibpd;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
};
struct rxe_ah {
struct ib_ah ibah;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
struct rxe_av av;
bool is_user;
int ah_num;
@@ -60,13 +59,14 @@ struct rxe_cqe {
struct rxe_cq {
struct ib_cq ibcq;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
struct rxe_queue *queue;
spinlock_t cq_lock;
u8 notify;
bool is_dying;
bool is_user;
struct tasklet_struct comp_task;
+ atomic_t num_wq;
};
enum wqe_state {
@@ -95,7 +95,7 @@ struct rxe_rq {
struct rxe_srq {
struct ib_srq ibsrq;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
struct rxe_pd *pd;
struct rxe_rq rq;
u32 srq_num;
@@ -123,11 +123,13 @@ struct rxe_req_info {
int need_rd_atomic;
int wait_psn;
int need_retry;
+ int wait_for_rnr_timer;
int noack_pkts;
struct rxe_task task;
};
struct rxe_comp_info {
+ enum rxe_qp_state state;
u32 psn;
int opcode;
int timeout;
@@ -154,10 +156,9 @@ struct resp_res {
union {
struct {
- struct sk_buff *skb;
+ u64 orig_val;
} atomic;
struct {
- struct rxe_mr *mr;
u64 va_org;
u32 rkey;
u32 length;
@@ -189,7 +190,6 @@ struct rxe_resp_info {
u32 resid;
u32 rkey;
u32 length;
- u64 atomic_orig;
/* SRQ only */
struct {
@@ -209,7 +209,7 @@ struct rxe_resp_info {
struct rxe_qp {
struct ib_qp ibqp;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
struct ib_qp_attr attr;
unsigned int valid;
unsigned int mtu;
@@ -232,9 +232,7 @@ struct rxe_qp {
struct rxe_av pri_av;
struct rxe_av alt_av;
- /* list of mcast groups qp has joined (for cleanup) */
- struct list_head grp_list;
- spinlock_t grp_lock; /* guard grp_list */
+ atomic_t mcg_num;
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
@@ -290,17 +288,6 @@ struct rxe_map {
struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
};
-struct rxe_map_set {
- struct rxe_map **map;
- u64 va;
- u64 iova;
- size_t length;
- u32 offset;
- u32 nbuf;
- int page_shift;
- int page_mask;
-};
-
static inline int rkey_is_mw(u32 rkey)
{
u32 index = rkey >> 8;
@@ -309,7 +296,7 @@ static inline int rkey_is_mw(u32 rkey)
}
struct rxe_mr {
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
struct ib_mr ibmr;
struct ib_umem *umem;
@@ -318,20 +305,23 @@ struct rxe_mr {
u32 rkey;
enum rxe_mr_state state;
enum ib_mr_type type;
+ u32 offset;
int access;
+ int page_shift;
+ int page_mask;
int map_shift;
int map_mask;
u32 num_buf;
+ u32 nbuf;
u32 max_buf;
u32 num_map;
atomic_t num_mw;
- struct rxe_map_set *cur_map_set;
- struct rxe_map_set *next_map_set;
+ struct rxe_map **map;
};
enum rxe_mw_state {
@@ -342,7 +332,7 @@ enum rxe_mw_state {
struct rxe_mw {
struct ib_mw ibmw;
- struct rxe_pool_entry pelem;
+ struct rxe_pool_elem elem;
spinlock_t lock;
enum rxe_mw_state state;
struct rxe_qp *qp; /* Type 2 only */
@@ -353,23 +343,20 @@ struct rxe_mw {
u64 length;
};
-struct rxe_mc_grp {
- struct rxe_pool_entry pelem;
- spinlock_t mcg_lock; /* guard group */
+struct rxe_mcg {
+ struct rb_node node;
+ struct kref ref_cnt;
struct rxe_dev *rxe;
struct list_head qp_list;
union ib_gid mgid;
- int num_qp;
+ atomic_t qp_num;
u32 qkey;
u16 pkey;
};
-struct rxe_mc_elem {
- struct rxe_pool_entry pelem;
+struct rxe_mca {
struct list_head qp_list;
- struct list_head grp_list;
struct rxe_qp *qp;
- struct rxe_mc_grp *grp;
};
struct rxe_port {
@@ -379,7 +366,6 @@ struct rxe_port {
spinlock_t port_lock; /* guard port */
unsigned int mtu_cap;
/* special QPs */
- u32 qp_smi_index;
u32 qp_gsi_index;
};
@@ -392,8 +378,6 @@ struct rxe_dev {
struct net_device *ndev;
- int xmit_errors;
-
struct rxe_pool uc_pool;
struct rxe_pool pd_pool;
struct rxe_pool ah_pool;
@@ -402,8 +386,12 @@ struct rxe_dev {
struct rxe_pool cq_pool;
struct rxe_pool mr_pool;
struct rxe_pool mw_pool;
- struct rxe_pool mc_grp_pool;
- struct rxe_pool mc_elem_pool;
+
+ /* multicast support */
+ spinlock_t mcg_lock;
+ struct rb_root mcg_tree;
+ atomic_t mcg_num;
+ atomic_t mcg_attach;
spinlock_t pending_lock; /* guard pending_mmaps */
struct list_head pending_mmaps;
@@ -484,6 +472,4 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
-void rxe_mc_cleanup(struct rxe_pool_entry *arg);
-
#endif /* RXE_VERBS_H */
diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index 1b5105cbabae..81b70a3eeb87 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -1,7 +1,10 @@
config RDMA_SIW
tristate "Software RDMA over TCP/IP (iWARP) driver"
- depends on INET && INFINIBAND && LIBCRC32C
+ depends on INET && INFINIBAND
depends on INFINIBAND_VIRT_DMA
+ select LIBCRC32C
+ select CRYPTO
+ select CRYPTO_CRC32C
help
This driver implements the iWARP RDMA transport over
the Linux TCP/IP network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 368959ae9a8c..2f3a9cda3850 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -418,6 +418,7 @@ struct siw_qp {
struct ib_qp base_qp;
struct siw_device *sdev;
struct kref ref;
+ struct completion qp_free;
struct list_head devq;
int tx_cpu;
struct siw_qp_attrs attrs;
@@ -644,14 +645,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
return &qp->orq[qp->orq_get % qp->attrs.orq_size];
}
-static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
-{
- return &qp->orq[qp->orq_put % qp->attrs.orq_size];
-}
-
static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
{
- struct siw_sqe *orq_e = orq_get_tail(qp);
+ struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 7acdd3c3a599..f88d2971c2c6 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep)
enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR;
rv = siw_recv_mpa_rr(cep);
- if (rv != -EAGAIN)
- siw_cancel_mpatimer(cep);
if (rv)
goto out_err;
+ siw_cancel_mpatimer(cep);
+
rep = &cep->mpa.hdr;
if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) {
@@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep)
}
out_err:
- siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+ if (rv != -EAGAIN)
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
return rv;
}
@@ -968,14 +969,15 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_set_inuse(new_cep);
rv = siw_proc_mpareq(new_cep);
- siw_cep_set_free(new_cep);
-
if (rv != -EAGAIN) {
siw_cep_put(cep);
new_cep->listen_cep = NULL;
- if (rv)
+ if (rv) {
+ siw_cep_set_free(new_cep);
goto error;
+ }
}
+ siw_cep_set_free(new_cep);
}
return;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 9093e6a80b26..dacc174604bf 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -98,15 +98,14 @@ static int siw_create_tx_threads(void)
continue;
siw_tx_thread[cpu] =
- kthread_create(siw_run_sq, (unsigned long *)(long)cpu,
- "siw_tx/%d", cpu);
+ kthread_run_on_cpu(siw_run_sq,
+ (unsigned long *)(long)cpu,
+ cpu, "siw_tx/%u");
if (IS_ERR(siw_tx_thread[cpu])) {
siw_tx_thread[cpu] = NULL;
continue;
}
- kthread_bind(siw_tx_thread[cpu], cpu);
- wake_up_process(siw_tx_thread[cpu]);
assigned++;
}
return assigned;
@@ -120,6 +119,7 @@ static int siw_dev_qualified(struct net_device *netdev)
* <linux/if_arp.h> for type identifiers.
*/
if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 ||
+ netdev->type == ARPHRD_NONE ||
(netdev->type == ARPHRD_LOOPBACK && loopback_enabled))
return 1;
@@ -316,12 +316,12 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
sdev->netdev = netdev;
- if (netdev->type != ARPHRD_LOOPBACK) {
+ if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) {
addrconf_addr_eui48((unsigned char *)&base_dev->node_guid,
netdev->dev_addr);
} else {
/*
- * The loopback device does not have a HW address,
+ * This device does not have a HW address,
* but connection mangagement lib expects gid != 0
*/
size_t len = min_t(size_t, strlen(base_dev->name), 6);
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 7e01f2438afc..e6f634971228 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1342,6 +1342,6 @@ void siw_free_qp(struct kref *ref)
vfree(qp->orq);
siw_put_tx_cpu(qp->tx_cpu);
-
+ complete(&qp->qp_free);
atomic_dec(&sdev->num_qp);
}
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index 60116f20653c..fd721cc19682 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -961,27 +961,28 @@ out:
static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
{
struct sk_buff *skb = srx->skb;
+ int avail = min(srx->skb_new, srx->fpdu_part_rem);
u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
__wsum crc_in, crc_own = 0;
siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
srx->fpdu_part_rem, srx->skb_new, srx->pad);
- if (srx->skb_new < srx->fpdu_part_rem)
- return -EAGAIN;
-
- skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
+ skb_copy_bits(skb, srx->skb_offset, tbuf, avail);
- if (srx->mpa_crc_hd && srx->pad)
- crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
+ srx->skb_new -= avail;
+ srx->skb_offset += avail;
+ srx->skb_copied += avail;
+ srx->fpdu_part_rem -= avail;
- srx->skb_new -= srx->fpdu_part_rem;
- srx->skb_offset += srx->fpdu_part_rem;
- srx->skb_copied += srx->fpdu_part_rem;
+ if (srx->fpdu_part_rem)
+ return -EAGAIN;
if (!srx->mpa_crc_hd)
return 0;
+ if (srx->pad)
+ crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
/*
* CRC32 is computed, transmitted and received directly in NBO,
* so there's never a reason to convert byte order.
@@ -1083,10 +1084,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
* completely received.
*/
if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
- bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
+ int hdrlen = iwarp_pktinfo[opcode].hdr_len;
- if (srx->skb_new < bytes)
- return -EAGAIN;
+ bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new);
skb_copy_bits(skb, srx->skb_offset,
(char *)c_hdr + srx->fpdu_part_rcvd, bytes);
@@ -1096,6 +1096,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
srx->skb_new -= bytes;
srx->skb_offset += bytes;
srx->skb_copied += bytes;
+
+ if (srx->fpdu_part_rcvd < hdrlen)
+ return -EAGAIN;
}
/*
@@ -1153,11 +1156,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
spin_lock_irqsave(&qp->orq_lock, flags);
- rreq = orq_get_current(qp);
-
/* free current orq entry */
+ rreq = orq_get_current(qp);
WRITE_ONCE(rreq->flags, 0);
+ qp->orq_get++;
+
if (qp->tx_ctx.orq_fence) {
if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
@@ -1165,10 +1169,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
rv = -EPROTO;
goto out;
}
- /* resume SQ processing */
+ /* resume SQ processing, if possible */
if (tx_waiting->sqe.opcode == SIW_OP_READ ||
tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- rreq = orq_get_tail(qp);
+
+ /* SQ processing was stopped because of a full ORQ */
+ rreq = orq_get_free(qp);
if (unlikely(!rreq)) {
pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
rv = -EPROTO;
@@ -1181,15 +1187,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
resume_tx = 1;
} else if (siw_orq_empty(qp)) {
+ /*
+ * SQ processing was stopped by fenced work request.
+ * Resume since all previous Read's are now completed.
+ */
qp->tx_ctx.orq_fence = 0;
resume_tx = 1;
- } else {
- pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
- qp_id(qp), qp->orq_get, qp->orq_put);
- rv = -EPROTO;
}
}
- qp->orq_get++;
out:
spin_unlock_irqrestore(&qp->orq_lock, flags);
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 1f4e60257700..7d47b521070b 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
if (paddr)
- return virt_to_page(paddr);
+ return virt_to_page((void *)paddr);
return NULL;
}
@@ -533,13 +533,23 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
kunmap_local(kaddr);
}
} else {
- u64 va = sge->laddr + sge_off;
+ /*
+ * Cast to an uintptr_t to preserve all 64 bits
+ * in sge->laddr.
+ */
+ uintptr_t va = (uintptr_t)(sge->laddr + sge_off);
- page_array[seg] = virt_to_page(va & PAGE_MASK);
+ /*
+ * virt_to_page() takes a (void *) pointer
+ * so cast to a (void *) meaning it will be 64
+ * bits on a 64 bit platform and 32 bits on a
+ * 32 bit platform.
+ */
+ page_array[seg] = virt_to_page((void *)(va & PAGE_MASK));
if (do_crc)
crypto_shash_update(
c_tx->mpa_crc_hd,
- (void *)(uintptr_t)va,
+ (void *)va,
plen);
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 1b36350601fa..3e814cfb298c 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -8,6 +8,7 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/xarray.h>
+#include <net/addrconf.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
@@ -131,8 +132,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
/* Revisit atomic caps if RFC 7306 gets supported */
attr->atomic_cap = 0;
- attr->device_cap_flags =
- IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG;
+ attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG;
attr->max_cq = sdev->attrs.max_cq;
attr->max_cqe = sdev->attrs.max_cqe;
attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL;
@@ -155,7 +156,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
attr->vendor_id = SIW_VENDOR_ID;
attr->vendor_part_id = sdev->vendor_part_id;
- memcpy(&attr->sys_image_guid, sdev->netdev->dev_addr, 6);
+ addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
+ sdev->netdev->dev_addr);
return 0;
}
@@ -311,7 +313,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
siw_dbg(base_dev, "too many QP's\n");
- return -ENOMEM;
+ rv = -ENOMEM;
+ goto err_atomic;
}
if (attrs->qp_type != IB_QPT_RC) {
siw_dbg(base_dev, "only RC QP's supported\n");
@@ -477,6 +480,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
list_add_tail(&qp->devq, &sdev->qp_list);
spin_unlock_irqrestore(&sdev->lock, flags);
+ init_completion(&qp->qp_free);
+
return 0;
err_out_xa:
@@ -621,6 +626,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
qp->scq = qp->rcq = NULL;
siw_qp_put(qp);
+ wait_for_completion(&qp->qp_free);
return 0;
}
@@ -660,7 +666,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
kbuf += core_sge->length;
core_sge++;
}
- sqe->sge[0].length = bytes > 0 ? bytes : 0;
+ sqe->sge[0].length = max(bytes, 0);
sqe->num_sge = bytes > 0 ? 1 : 0;
return bytes;
@@ -1164,7 +1170,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
err_out:
siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
- if (cq && cq->queue) {
+ if (cq->queue) {
struct siw_ucontext *ctx =
rdma_udata_to_drv_context(udata, struct siw_ucontext,
base_ucontext);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 44d8d151ff90..35e9c8a330e2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -411,6 +411,7 @@ struct ipoib_dev_priv {
struct dentry *path_dentry;
#endif
u64 hca_caps;
+ u64 kernel_caps;
struct ipoib_ethtool_st ethtool;
unsigned int max_send_sge;
const struct net_device_ops *rn_ops;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index fd9d7f2c4d64..b610d36295bb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -465,7 +465,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
goto err_qp;
}
- psn = prandom_u32() & 0xffffff;
+ psn = get_random_u32() & 0xffffff;
ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
if (ret)
goto err_modify;
@@ -884,8 +884,8 @@ int ipoib_cm_dev_open(struct net_device *dev)
goto err_cm;
}
- ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
- 0);
+ ret = ib_cm_listen(priv->cm.id,
+ cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num));
if (ret) {
pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
IPOIB_CM_IETF_ID | priv->qp->qp_num);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index a09ca21f7dff..8af99b18d361 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -65,10 +65,10 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
ib_get_device_fw_str(priv->ca, drvinfo->fw_version);
- strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
+ strscpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
sizeof(drvinfo->bus_info));
- strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver));
+ strscpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver));
}
static int ipoib_get_coalesce(struct net_device *dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 2c3dca41d3bd..ed25061fac62 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -573,7 +573,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) {
- hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ hlen = skb_tcp_all_headers(skb);
phead = skb->data;
if (unlikely(!skb_pull(skb, hlen))) {
ipoib_warn(priv, "linear data too small\n");
@@ -1109,7 +1109,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
* if he sets the device address back to be based on GID index 0,
* he no longer wishs to control it.
*
- * If the user doesn't control the the device address,
+ * If the user doesn't control the device address,
* IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
* the port GUID has changed and GID at index 0 has changed
* so we need to change priv->local_gid and priv->dev->dev_addr
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9934b8bd7f56..ac25fc80fb33 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -742,7 +742,7 @@ void ipoib_flush_paths(struct net_device *dev)
static void path_rec_completion(int status,
struct sa_path_rec *pathrec,
- void *path_ptr)
+ int num_prs, void *path_ptr)
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
@@ -1664,8 +1664,10 @@ static void ipoib_napi_add(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC);
- netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE);
+ netif_napi_add_weight(dev, &priv->recv_napi, ipoib_rx_poll,
+ IPOIB_NUM_WC);
+ netif_napi_add_weight(dev, &priv->send_napi, ipoib_tx_poll,
+ MAX_SEND_CQE);
}
static void ipoib_napi_del(struct net_device *dev)
@@ -1850,11 +1852,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev)
static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
{
priv->hca_caps = priv->ca->attrs.device_cap_flags;
+ priv->kernel_caps = priv->ca->attrs.kernel_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
- if (priv->hca_caps & IB_DEVICE_UD_TSO)
+ if (priv->kernel_caps & IBK_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
priv->dev->features |= priv->dev->hw_features;
@@ -2201,7 +2204,7 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name,
priv->rn_ops = dev->netdev_ops;
- if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
+ if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf;
else
dev->netdev_ops = &ipoib_netdev_ops_pf;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 5b05cf3837da..ea16ba5d8da6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -32,7 +32,6 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h> /* For ARPHRD_xxx */
-#include <linux/module.h>
#include <net/rtnetlink.h>
#include "ipoib.h"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5a150a080ac2..368e5d77416d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -197,16 +197,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.send_cq = priv->send_cq;
init_attr.recv_cq = priv->recv_cq;
- if (priv->hca_caps & IB_DEVICE_UD_TSO)
+ if (priv->kernel_caps & IBK_UD_TSO)
init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
- if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
+ if (priv->kernel_caps & IBK_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
- if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA)
+ if (priv->kernel_caps & IBK_RDMA_NETDEV_OPA)
init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE;
priv->qp = ib_create_qp(priv->pd, &init_attr);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 0322dc75396f..4bd161e86f8d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/sched/signal.h>
#include <linux/init.h>
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 776e46ee95da..620ae5b2d80d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -113,10 +113,6 @@ bool iser_pi_enable = false;
module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
-int iser_pi_guard;
-module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
-
static int iscsi_iser_set(const char *val, const struct kernel_param *kp)
{
int ret;
@@ -139,9 +135,8 @@ static int iscsi_iser_set(const char *val, const struct kernel_param *kp)
* Notes: In case of data length errors or iscsi PDU completion failures
* this routine will signal iscsi layer of connection failure.
*/
-void
-iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
- char *rx_data, int rx_data_len)
+void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ char *rx_data, int rx_data_len)
{
int rc = 0;
int datalen;
@@ -176,8 +171,7 @@ error:
* Netes: This routine can't fail, just assign iscsi task
* hdr and max hdr size.
*/
-static int
-iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
+static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
{
struct iscsi_iser_task *iser_task = task->dd_data;
@@ -198,9 +192,8 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
* state mutex to avoid dereferencing the IB device which
* may have already been terminated.
*/
-int
-iser_initialize_task_headers(struct iscsi_task *task,
- struct iser_tx_desc *tx_desc)
+int iser_initialize_task_headers(struct iscsi_task *task,
+ struct iser_tx_desc *tx_desc)
{
struct iser_conn *iser_conn = task->conn->dd_data;
struct iser_device *device = iser_conn->ib_conn.device;
@@ -237,8 +230,7 @@ iser_initialize_task_headers(struct iscsi_task *task,
* Return: Returns zero on success or -ENOMEM when failing
* to init task headers (dma mapping error).
*/
-static int
-iscsi_iser_task_init(struct iscsi_task *task)
+static int iscsi_iser_task_init(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
int ret;
@@ -272,8 +264,8 @@ iscsi_iser_task_init(struct iscsi_task *task)
* xmit.
*
**/
-static int
-iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
+static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn,
+ struct iscsi_task *task)
{
int error = 0;
@@ -290,9 +282,8 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
return error;
}
-static int
-iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn,
- struct iscsi_task *task)
+static int iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn,
+ struct iscsi_task *task)
{
struct iscsi_r2t_info *r2t = &task->unsol_r2t;
struct iscsi_data hdr;
@@ -326,8 +317,7 @@ iscsi_iser_task_xmit_unsol_data_exit:
*
* Return: zero on success or escalates $error on failure.
*/
-static int
-iscsi_iser_task_xmit(struct iscsi_task *task)
+static int iscsi_iser_task_xmit(struct iscsi_task *task)
{
struct iscsi_conn *conn = task->conn;
struct iscsi_iser_task *iser_task = task->dd_data;
@@ -410,8 +400,7 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
*
* In addition the error sector is marked.
*/
-static u8
-iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
+static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
{
struct iscsi_iser_task *iser_task = task->dd_data;
enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ?
@@ -460,11 +449,9 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
* -EINVAL in case end-point doesn't exsits anymore or iser connection
* state is not UP (teardown already started).
*/
-static int
-iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
- struct iscsi_cls_conn *cls_conn,
- uint64_t transport_eph,
- int is_leading)
+static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
+ struct iscsi_cls_conn *cls_conn,
+ uint64_t transport_eph, int is_leading)
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iser_conn *iser_conn;
@@ -519,8 +506,7 @@ out:
* from this point iscsi must call conn_stop in session/connection
* teardown so iser transport must wait for it.
*/
-static int
-iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
+static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
{
struct iscsi_conn *iscsi_conn;
struct iser_conn *iser_conn;
@@ -542,8 +528,7 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
* handle, so we call it under iser the state lock to protect against
* this kind of race.
*/
-static void
-iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
+static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iser_conn *iser_conn = conn->dd_data;
@@ -578,18 +563,16 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
*
* Removes and free iscsi host.
*/
-static void
-iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
+static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
{
struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
iscsi_session_teardown(cls_session);
- iscsi_host_remove(shost);
+ iscsi_host_remove(shost, false);
iscsi_host_free(shost);
}
-static inline unsigned int
-iser_dif_prot_caps(int prot_caps)
+static inline unsigned int iser_dif_prot_caps(int prot_caps)
{
int ret = 0;
@@ -667,7 +650,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
SHOST_DIX_GUARD_CRC);
}
- if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
shost->virt_boundary_mask = SZ_4K - 1;
if (iscsi_host_add(shost, ib_dev->dev.parent)) {
@@ -702,15 +685,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
return cls_session;
remove_host:
- iscsi_host_remove(shost);
+ iscsi_host_remove(shost, false);
free_host:
iscsi_host_free(shost);
return NULL;
}
-static int
-iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
- enum iscsi_param param, char *buf, int buflen)
+static int iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
+ enum iscsi_param param, char *buf, int buflen)
{
int value;
@@ -760,8 +742,8 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
*
* Output connection statistics.
*/
-static void
-iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
+static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn,
+ struct iscsi_stats *stats)
{
struct iscsi_conn *conn = cls_conn->dd_data;
@@ -812,9 +794,9 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
* Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
* if fails.
*/
-static struct iscsi_endpoint *
-iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
- int non_blocking)
+static struct iscsi_endpoint *iscsi_iser_ep_connect(struct Scsi_Host *shost,
+ struct sockaddr *dst_addr,
+ int non_blocking)
{
int err;
struct iser_conn *iser_conn;
@@ -857,8 +839,7 @@ failure:
* or more likely iser connection state transitioned to TEMINATING or
* DOWN during the wait period.
*/
-static int
-iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
+static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
{
struct iser_conn *iser_conn = ep->dd_data;
int rc;
@@ -893,8 +874,7 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
* and cleanup or actually call it immediately in case we didn't pass
* iscsi conn bind/start stage, thus it is safe.
*/
-static void
-iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
+static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
{
struct iser_conn *iser_conn = ep->dd_data;
@@ -991,6 +971,7 @@ static struct scsi_host_template iscsi_iser_sht = {
.proc_name = "iscsi_iser",
.this_id = -1,
.track_queue_depth = 1,
+ .cmd_size = sizeof(struct iscsi_cmd),
};
static struct iscsi_transport iscsi_iser_transport = {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9f6ac0a09a78..dee8c97ff056 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -119,8 +119,6 @@
#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX)
-#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2)
-
/* the max TX (send) WR supported by the iSER QP is defined by *
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
* to have at max for SCSI command. The tx posting & completion handling code *
@@ -148,8 +146,6 @@
- ISER_MAX_RX_MISC_PDUS) / \
(1 + ISER_INFLIGHT_DATAOUTS))
-#define ISER_SIGNAL_CMD_COUNT 32
-
/* Constant PDU lengths calculations */
#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
@@ -207,12 +203,12 @@ struct iser_reg_resources;
*
* @sge: memory region sg element
* @rkey: memory region remote key
- * @mem_h: pointer to registration context (FMR/Fastreg)
+ * @desc: pointer to fast registration context
*/
struct iser_mem_reg {
- struct ib_sge sge;
- u32 rkey;
- void *mem_h;
+ struct ib_sge sge;
+ u32 rkey;
+ struct iser_fr_desc *desc;
};
enum iser_desc_type {
@@ -366,11 +362,8 @@ struct iser_fr_pool {
* @qp: Connection Queue-pair
* @cq: Connection completion queue
* @cq_size: The number of max outstanding completions
- * @post_recv_buf_count: post receive counter
- * @sig_count: send work request signal count
- * @rx_wr: receive work request for batch posts
* @device: reference to iser device
- * @fr_pool: connection fast registration poool
+ * @fr_pool: connection fast registration pool
* @pi_support: Indicate device T10-PI support
* @reg_cqe: completion handler
*/
@@ -379,9 +372,6 @@ struct ib_conn {
struct ib_qp *qp;
struct ib_cq *cq;
u32 cq_size;
- int post_recv_buf_count;
- u8 sig_count;
- struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_fr_pool fr_pool;
bool pi_support;
@@ -397,8 +387,6 @@ struct ib_conn {
* @state: connection logical state
* @qp_max_recv_dtos: maximum number of data outs, corresponds
* to max number of post recvs
- * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
- * @min_posted_rx: (qp_max_recv_dtos >> 2)
* @max_cmds: maximum cmds allowed for this connection
* @name: connection peer portal
* @release_work: deffered work for release job
@@ -409,7 +397,6 @@ struct ib_conn {
* (state is ISER_CONN_UP)
* @conn_list: entry in ig conn list
* @login_desc: login descriptor
- * @rx_desc_head: head of rx_descs cyclic buffer
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
* @scsi_sg_tablesize: scsi host sg_tablesize
@@ -422,8 +409,6 @@ struct iser_conn {
struct iscsi_endpoint *ep;
enum iser_conn_state state;
unsigned qp_max_recv_dtos;
- unsigned qp_max_recv_dtos_mask;
- unsigned min_posted_rx;
u16 max_cmds;
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
@@ -433,7 +418,6 @@ struct iser_conn {
struct completion up_completion;
struct list_head conn_list;
struct iser_login_desc login_desc;
- unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
@@ -486,7 +470,6 @@ struct iser_global {
extern struct iser_global ig;
extern int iser_debug_level;
extern bool iser_pi_enable;
-extern int iser_pi_guard;
extern unsigned int iser_max_sectors;
extern bool iser_always_reg;
@@ -543,18 +526,17 @@ int iser_connect(struct iser_conn *iser_conn,
int non_blocking);
int iser_post_recvl(struct iser_conn *iser_conn);
-int iser_post_recvm(struct iser_conn *iser_conn, int count);
-int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
- bool signal);
+int iser_post_recvm(struct iser_conn *iser_conn,
+ struct iser_rx_desc *rx_desc);
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc);
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
enum iser_data_dir iser_dir,
enum dma_data_direction dma_dir);
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum dma_data_direction dir);
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 27a6f75a9912..7b83f48f60c5 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -52,30 +52,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
struct iser_mem_reg *mem_reg;
int err;
struct iser_ctrl *hdr = &iser_task->desc.iser_header;
- struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
err = iser_dma_map_task_data(iser_task,
- buf_in,
ISER_DIR_IN,
DMA_FROM_DEVICE);
if (err)
return err;
- if (scsi_prot_sg_count(iser_task->sc)) {
- struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN];
-
- err = iser_dma_map_task_data(iser_task,
- pbuf_in,
- ISER_DIR_IN,
- DMA_FROM_DEVICE);
- if (err)
- return err;
- }
-
err = iser_reg_mem_fastreg(iser_task, ISER_DIR_IN, false);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
- return err;
+ goto out_err;
}
mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
@@ -88,6 +75,10 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
(unsigned long long)mem_reg->sge.addr);
return 0;
+
+out_err:
+ iser_dma_unmap_task_data(iser_task, ISER_DIR_IN, DMA_FROM_DEVICE);
+ return err;
}
/* Register user buffer memory and initialize passive rdma
@@ -95,11 +86,8 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
* task->data[ISER_DIR_OUT].data_len, Protection size
* is stored at task->prot[ISER_DIR_OUT].data_len
*/
-static int
-iser_prepare_write_cmd(struct iscsi_task *task,
- unsigned int imm_sz,
- unsigned int unsol_sz,
- unsigned int edtl)
+static int iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz,
+ unsigned int unsol_sz, unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
@@ -109,28 +97,16 @@ iser_prepare_write_cmd(struct iscsi_task *task,
struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
err = iser_dma_map_task_data(iser_task,
- buf_out,
ISER_DIR_OUT,
DMA_TO_DEVICE);
if (err)
return err;
- if (scsi_prot_sg_count(iser_task->sc)) {
- struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT];
-
- err = iser_dma_map_task_data(iser_task,
- pbuf_out,
- ISER_DIR_OUT,
- DMA_TO_DEVICE);
- if (err)
- return err;
- }
-
err = iser_reg_mem_fastreg(iser_task, ISER_DIR_OUT,
buf_out->data_len == imm_sz);
- if (err != 0) {
+ if (err) {
iser_err("Failed to register write cmd RDMA mem\n");
- return err;
+ goto out_err;
}
mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
@@ -157,11 +133,15 @@ iser_prepare_write_cmd(struct iscsi_task *task,
}
return 0;
+
+out_err:
+ iser_dma_unmap_task_data(iser_task, ISER_DIR_OUT, DMA_TO_DEVICE);
+ return err;
}
/* creates a new tx descriptor and adds header regd buffer */
-static void iser_create_send_desc(struct iser_conn *iser_conn,
- struct iser_tx_desc *tx_desc)
+static void iser_create_send_desc(struct iser_conn *iser_conn,
+ struct iser_tx_desc *tx_desc)
{
struct iser_device *device = iser_conn->ib_conn.device;
@@ -247,8 +227,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
struct iser_device *device = ib_conn->device;
iser_conn->qp_max_recv_dtos = session->cmds_max;
- iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
- iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
if (iser_alloc_fastreg_pool(ib_conn, session->scsi_cmds_max,
iser_conn->pages_per_mr))
@@ -280,7 +258,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
rx_sg->lkey = device->pd->local_dma_lkey;
}
- iser_conn->rx_desc_head = 0;
return 0;
rx_desc_dma_map_failed:
@@ -322,37 +299,35 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn)
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
{
struct iser_conn *iser_conn = conn->dd_data;
- struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iscsi_session *session = conn->session;
+ int err = 0;
+ int i;
iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
/* check if this is the last login - going to full feature phase */
if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE)
- return 0;
-
- /*
- * Check that there is one posted recv buffer
- * (for the last login response).
- */
- WARN_ON(ib_conn->post_recv_buf_count != 1);
+ goto out;
if (session->discovery_sess) {
iser_info("Discovery session, re-using login RX buffer\n");
- return 0;
- } else
- iser_info("Normal session, posting batch of RX %d buffers\n",
- iser_conn->min_posted_rx);
-
- /* Initial post receive buffers */
- if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx))
- return -ENOMEM;
+ goto out;
+ }
- return 0;
-}
+ iser_info("Normal session, posting batch of RX %d buffers\n",
+ iser_conn->qp_max_recv_dtos - 1);
-static inline bool iser_signal_comp(u8 sig_count)
-{
- return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
+ /*
+ * Initial post receive buffers.
+ * There is one already posted recv buffer (for the last login
+ * response). Therefore, the first recv buffer is skipped here.
+ */
+ for (i = 1; i < iser_conn->qp_max_recv_dtos; i++) {
+ err = iser_post_recvm(iser_conn, &iser_conn->rx_descs[i]);
+ if (err)
+ goto out;
+ }
+out:
+ return err;
}
/**
@@ -360,8 +335,7 @@ static inline bool iser_signal_comp(u8 sig_count)
* @conn: link to matching iscsi connection
* @task: SCSI command task
*/
-int iser_send_command(struct iscsi_conn *conn,
- struct iscsi_task *task)
+int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task)
{
struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
@@ -371,7 +345,6 @@ int iser_send_command(struct iscsi_conn *conn,
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
- u8 sig_count = ++iser_conn->ib_conn.sig_count;
edtl = ntohl(hdr->data_length);
@@ -418,8 +391,7 @@ int iser_send_command(struct iscsi_conn *conn,
iser_task->status = ISER_TASK_STATUS_STARTED;
- err = iser_post_send(&iser_conn->ib_conn, tx_desc,
- iser_signal_comp(sig_count));
+ err = iser_post_send(&iser_conn->ib_conn, tx_desc);
if (!err)
return 0;
@@ -434,8 +406,7 @@ send_command_error:
* @task: SCSI command task
* @hdr: pointer to the LLD's iSCSI message header
*/
-int iser_send_data_out(struct iscsi_conn *conn,
- struct iscsi_task *task,
+int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task,
struct iscsi_data *hdr)
{
struct iser_conn *iser_conn = conn->dd_data;
@@ -487,7 +458,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
itt, buf_offset, data_seg_len);
- err = iser_post_send(&iser_conn->ib_conn, tx_desc, true);
+ err = iser_post_send(&iser_conn->ib_conn, tx_desc);
if (!err)
return 0;
@@ -497,8 +468,7 @@ send_data_out_error:
return err;
}
-int iser_send_control(struct iscsi_conn *conn,
- struct iscsi_task *task)
+int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task)
{
struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
@@ -550,7 +520,7 @@ int iser_send_control(struct iscsi_conn *conn,
goto send_control_error;
}
- err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
+ err = iser_post_send(&iser_conn->ib_conn, mdesc);
if (!err)
return 0;
@@ -567,6 +537,7 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
struct iscsi_hdr *hdr;
char *data;
int length;
+ bool full_feature_phase;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
iser_err_comp(wc, "login_rsp");
@@ -580,6 +551,9 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
hdr = desc->rsp + sizeof(struct iser_ctrl);
data = desc->rsp + ISER_HEADERS_LEN;
length = wc->byte_len - ISER_HEADERS_LEN;
+ full_feature_phase = ((hdr->flags & ISCSI_FULL_FEATURE_PHASE) ==
+ ISCSI_FULL_FEATURE_PHASE) &&
+ (hdr->flags & ISCSI_FLAG_CMD_FINAL);
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
hdr->itt, length);
@@ -590,11 +564,15 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
desc->rsp_dma, ISER_RX_LOGIN_SIZE,
DMA_FROM_DEVICE);
- ib_conn->post_recv_buf_count--;
+ if (!full_feature_phase ||
+ iser_conn->iscsi_conn->session->discovery_sess)
+ return;
+
+ /* Post the first RX buffer that is skipped in iser_post_rx_bufs() */
+ iser_post_recvm(iser_conn, iser_conn->rx_descs);
}
-static inline int
-iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
{
if (unlikely((!desc->sig_protected && rkey != desc->rsc.mr->rkey) ||
(desc->sig_protected && rkey != desc->rsc.sig_mr->rkey))) {
@@ -607,10 +585,8 @@ iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
return 0;
}
-static int
-iser_check_remote_inv(struct iser_conn *iser_conn,
- struct ib_wc *wc,
- struct iscsi_hdr *hdr)
+static int iser_check_remote_inv(struct iser_conn *iser_conn, struct ib_wc *wc,
+ struct iscsi_hdr *hdr)
{
if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
struct iscsi_task *task;
@@ -631,13 +607,13 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
struct iser_fr_desc *desc;
if (iser_task->dir[ISER_DIR_IN]) {
- desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+ desc = iser_task->rdma_reg[ISER_DIR_IN].desc;
if (unlikely(iser_inv_desc(desc, rkey)))
return -EINVAL;
}
if (iser_task->dir[ISER_DIR_OUT]) {
- desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+ desc = iser_task->rdma_reg[ISER_DIR_OUT].desc;
if (unlikely(iser_inv_desc(desc, rkey)))
return -EINVAL;
}
@@ -657,8 +633,7 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
struct iser_conn *iser_conn = to_iser_conn(ib_conn);
struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
struct iscsi_hdr *hdr;
- int length;
- int outstanding, count, err;
+ int length, err;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
iser_err_comp(wc, "task_rsp");
@@ -687,20 +662,9 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
DMA_FROM_DEVICE);
- /* decrementing conn->post_recv_buf_count only --after-- freeing the *
- * task eliminates the need to worry on tasks which are completed in *
- * parallel to the execution of iser_conn_term. So the code that waits *
- * for the posted rx bufs refcount to become zero handles everything */
- ib_conn->post_recv_buf_count--;
-
- outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
- count = min(iser_conn->qp_max_recv_dtos - outstanding,
- iser_conn->min_posted_rx);
- err = iser_post_recvm(iser_conn, count);
- if (err)
- iser_err("posting %d rx bufs err %d\n", count, err);
- }
+ err = iser_post_recvm(iser_conn, desc);
+ if (err)
+ iser_err("posting rx buffer err %d\n", err);
}
void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
@@ -764,27 +728,16 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- int prot_count = scsi_prot_sg_count(iser_task->sc);
if (iser_task->dir[ISER_DIR_IN]) {
iser_unreg_mem_fastreg(iser_task, ISER_DIR_IN);
- iser_dma_unmap_task_data(iser_task,
- &iser_task->data[ISER_DIR_IN],
+ iser_dma_unmap_task_data(iser_task, ISER_DIR_IN,
DMA_FROM_DEVICE);
- if (prot_count)
- iser_dma_unmap_task_data(iser_task,
- &iser_task->prot[ISER_DIR_IN],
- DMA_FROM_DEVICE);
}
if (iser_task->dir[ISER_DIR_OUT]) {
iser_unreg_mem_fastreg(iser_task, ISER_DIR_OUT);
- iser_dma_unmap_task_data(iser_task,
- &iser_task->data[ISER_DIR_OUT],
+ iser_dma_unmap_task_data(iser_task, ISER_DIR_OUT,
DMA_TO_DEVICE);
- if (prot_count)
- iser_dma_unmap_task_data(iser_task,
- &iser_task->prot[ISER_DIR_OUT],
- DMA_TO_DEVICE);
}
}
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 9776b755d848..29ae2c6a250a 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -30,7 +30,6 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
@@ -44,8 +43,7 @@ void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
iser_err_comp(wc, "memreg");
}
-static struct iser_fr_desc *
-iser_reg_desc_get_fr(struct ib_conn *ib_conn)
+static struct iser_fr_desc *iser_reg_desc_get_fr(struct ib_conn *ib_conn)
{
struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
struct iser_fr_desc *desc;
@@ -60,9 +58,8 @@ iser_reg_desc_get_fr(struct ib_conn *ib_conn)
return desc;
}
-static void
-iser_reg_desc_put_fr(struct ib_conn *ib_conn,
- struct iser_fr_desc *desc)
+static void iser_reg_desc_put_fr(struct ib_conn *ib_conn,
+ struct iser_fr_desc *desc)
{
struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
unsigned long flags;
@@ -73,10 +70,10 @@ iser_reg_desc_put_fr(struct ib_conn *ib_conn,
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum iser_data_dir iser_dir,
- enum dma_data_direction dma_dir)
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir)
{
+ struct iser_data_buf *data = &iser_task->data[iser_dir];
struct ib_device *dev;
iser_task->dir[iser_dir] = 1;
@@ -87,22 +84,44 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
iser_err("dma_map_sg failed!!!\n");
return -EINVAL;
}
+
+ if (scsi_prot_sg_count(iser_task->sc)) {
+ struct iser_data_buf *pdata = &iser_task->prot[iser_dir];
+
+ pdata->dma_nents = ib_dma_map_sg(dev, pdata->sg, pdata->size, dma_dir);
+ if (unlikely(pdata->dma_nents == 0)) {
+ iser_err("protection dma_map_sg failed!!!\n");
+ goto out_unmap;
+ }
+ }
+
return 0;
+
+out_unmap:
+ ib_dma_unmap_sg(dev, data->sg, data->size, dma_dir);
+ return -EINVAL;
}
+
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum dma_data_direction dir)
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir)
{
+ struct iser_data_buf *data = &iser_task->data[iser_dir];
struct ib_device *dev;
dev = iser_task->iser_conn->ib_conn.device->ib_device;
- ib_dma_unmap_sg(dev, data->sg, data->size, dir);
+ ib_dma_unmap_sg(dev, data->sg, data->size, dma_dir);
+
+ if (scsi_prot_sg_count(iser_task->sc)) {
+ struct iser_data_buf *pdata = &iser_task->prot[iser_dir];
+
+ ib_dma_unmap_sg(dev, pdata->sg, pdata->size, dma_dir);
+ }
}
-static int
-iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
- struct iser_mem_reg *reg)
+static int iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
+ struct iser_mem_reg *reg)
{
struct scatterlist *sg = mem->sg;
@@ -133,7 +152,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
struct iser_fr_desc *desc;
struct ib_mr_status mr_status;
- desc = reg->mem_h;
+ desc = reg->desc;
if (!desc)
return;
@@ -150,12 +169,12 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS,
&mr_status);
}
- iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->mem_h);
- reg->mem_h = NULL;
+ iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->desc);
+ reg->desc = NULL;
}
-static void
-iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
+static void iser_set_dif_domain(struct scsi_cmnd *sc,
+ struct ib_sig_domain *domain)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.pi_interval = scsi_prot_interval(sc);
@@ -171,8 +190,8 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
domain->sig.dif.ref_remap = true;
}
-static int
-iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
+static int iser_set_sig_attrs(struct scsi_cmnd *sc,
+ struct ib_sig_attrs *sig_attrs)
{
switch (scsi_get_prot_op(sc)) {
case SCSI_PROT_WRITE_INSERT:
@@ -205,8 +224,7 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
return 0;
}
-static inline void
-iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
+static inline void iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
*mask = 0;
if (sc->prot_flags & SCSI_PROT_REF_CHECK)
@@ -215,11 +233,8 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
*mask |= IB_SIG_CHECK_GUARD;
}
-static inline void
-iser_inv_rkey(struct ib_send_wr *inv_wr,
- struct ib_mr *mr,
- struct ib_cqe *cqe,
- struct ib_send_wr *next_wr)
+static inline void iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr,
+ struct ib_cqe *cqe, struct ib_send_wr *next_wr)
{
inv_wr->opcode = IB_WR_LOCAL_INV;
inv_wr->wr_cqe = cqe;
@@ -229,12 +244,11 @@ iser_inv_rkey(struct ib_send_wr *inv_wr,
inv_wr->next = next_wr;
}
-static int
-iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *mem,
- struct iser_data_buf *sig_mem,
- struct iser_reg_resources *rsc,
- struct iser_mem_reg *sig_reg)
+static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *mem,
+ struct iser_data_buf *sig_mem,
+ struct iser_reg_resources *rsc,
+ struct iser_mem_reg *sig_reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
@@ -335,42 +349,26 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
return 0;
}
-static int
-iser_reg_data_sg(struct iscsi_iser_task *task,
- struct iser_data_buf *mem,
- struct iser_fr_desc *desc,
- bool use_dma_key,
- struct iser_mem_reg *reg)
-{
- struct iser_device *device = task->iser_conn->ib_conn.device;
-
- if (use_dma_key)
- return iser_reg_dma(device, mem, reg);
-
- return iser_fast_reg_mr(task, mem, &desc->rsc, reg);
-}
-
int iser_reg_mem_fastreg(struct iscsi_iser_task *task,
enum iser_data_dir dir,
bool all_imm)
{
struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
struct iser_data_buf *mem = &task->data[dir];
struct iser_mem_reg *reg = &task->rdma_reg[dir];
- struct iser_fr_desc *desc = NULL;
+ struct iser_fr_desc *desc;
bool use_dma_key;
int err;
use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
+ if (use_dma_key)
+ return iser_reg_dma(device, mem, reg);
- if (!use_dma_key) {
- desc = iser_reg_desc_get_fr(ib_conn);
- reg->mem_h = desc;
- }
-
+ desc = iser_reg_desc_get_fr(ib_conn);
if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) {
- err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg);
+ err = iser_fast_reg_mr(task, mem, &desc->rsc, reg);
if (unlikely(err))
goto err_reg;
} else {
@@ -382,11 +380,12 @@ int iser_reg_mem_fastreg(struct iscsi_iser_task *task,
desc->sig_protected = true;
}
+ reg->desc = desc;
+
return 0;
err_reg:
- if (desc)
- iser_reg_desc_put_fr(ib_conn, desc);
+ iser_reg_desc_put_fr(ib_conn, desc);
return err;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index b566f7cb7797..a00ca117303a 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/delay.h>
@@ -116,7 +115,7 @@ iser_create_fastreg_desc(struct iser_device *device,
if (!desc)
return ERR_PTR(-ENOMEM);
- if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
@@ -247,6 +246,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
device = ib_conn->device;
ib_dev = device->ib_device;
+ /* +1 for drain */
if (ib_conn->pi_support)
max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
else
@@ -265,14 +265,15 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
memset(&init_attr, 0, sizeof(init_attr));
init_attr.event_handler = iser_qp_event_callback;
- init_attr.qp_context = (void *)ib_conn;
- init_attr.send_cq = ib_conn->cq;
- init_attr.recv_cq = ib_conn->cq;
- init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
+ init_attr.qp_context = (void *)ib_conn;
+ init_attr.send_cq = ib_conn->cq;
+ init_attr.recv_cq = ib_conn->cq;
+ /* +1 for drain */
+ init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS + 1;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
- init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
- init_attr.qp_type = IB_QPT_RC;
+ init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ init_attr.qp_type = IB_QPT_RC;
init_attr.cap.max_send_wr = max_send_wr;
if (ib_conn->pi_support)
init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
@@ -283,9 +284,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
goto out_err;
ib_conn->qp = ib_conn->cma_id->qp;
- iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n",
- ib_conn, ib_conn->cma_id,
- ib_conn->cma_id->qp, max_send_wr);
+ iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n", ib_conn,
+ ib_conn->cma_id, ib_conn->cma_id->qp, max_send_wr);
return ret;
out_err:
@@ -313,7 +313,7 @@ struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
goto inc_refcnt;
device = kzalloc(sizeof *device, GFP_KERNEL);
- if (device == NULL)
+ if (!device)
goto out;
/* assign this device to the device */
@@ -392,8 +392,7 @@ void iser_release_work(struct work_struct *work)
* so the cm_id removal is out of here. It is Safe to
* be invoked multiple times.
*/
-static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
- bool destroy)
+static void iser_free_ib_conn_res(struct iser_conn *iser_conn, bool destroy)
{
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
@@ -401,7 +400,7 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
iser_info("freeing conn %p cma_id %p qp %p\n",
iser_conn, ib_conn->cma_id, ib_conn->qp);
- if (ib_conn->qp != NULL) {
+ if (ib_conn->qp) {
rdma_destroy_qp(ib_conn->cma_id);
ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size);
ib_conn->qp = NULL;
@@ -411,7 +410,7 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
if (iser_conn->rx_descs)
iser_free_rx_descriptors(iser_conn);
- if (device != NULL) {
+ if (device) {
iser_device_try_release(device);
ib_conn->device = NULL;
}
@@ -445,7 +444,7 @@ void iser_conn_release(struct iser_conn *iser_conn)
iser_free_ib_conn_res(iser_conn, true);
mutex_unlock(&iser_conn->state_mutex);
- if (ib_conn->cma_id != NULL) {
+ if (ib_conn->cma_id) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
@@ -488,7 +487,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_conn, err);
/* block until all flush errors are consumed */
- ib_drain_sq(ib_conn->qp);
+ ib_drain_qp(ib_conn->qp);
}
return 1;
@@ -501,13 +500,12 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
{
struct iser_conn *iser_conn;
- iser_conn = (struct iser_conn *)cma_id->context;
+ iser_conn = cma_id->context;
iser_conn->state = ISER_CONN_TERMINATING;
}
-static void
-iser_calc_scsi_params(struct iser_conn *iser_conn,
- unsigned int max_sectors)
+static void iser_calc_scsi_params(struct iser_conn *iser_conn,
+ unsigned int max_sectors)
{
struct iser_device *device = iser_conn->ib_conn.device;
struct ib_device_attr *attr = &device->ib_device->attrs;
@@ -521,7 +519,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
* (head and tail) for a single page worth data, so one additional
* entry is required.
*/
- if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ if (attr->kernel_cap_flags & IBK_SG_GAPS_REG)
reserved_mr_pages = 0;
else
reserved_mr_pages = 1;
@@ -545,11 +543,11 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
- struct iser_conn *iser_conn;
- struct ib_conn *ib_conn;
+ struct iser_conn *iser_conn;
+ struct ib_conn *ib_conn;
int ret;
- iser_conn = (struct iser_conn *)cma_id->context;
+ iser_conn = cma_id->context;
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@@ -566,8 +564,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
- if (!(device->ib_device->attrs.device_cap_flags &
- IB_DEVICE_INTEGRITY_HANDOVER)) {
+ if (!(device->ib_device->attrs.kernel_cap_flags &
+ IBK_INTEGRITY_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
dev_name(&ib_conn->device->ib_device->dev));
@@ -593,9 +591,9 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
static void iser_route_handler(struct rdma_cm_id *cma_id)
{
struct rdma_conn_param conn_param;
- int ret;
+ int ret;
struct iser_cm_hdr req_hdr;
- struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+ struct iser_conn *iser_conn = cma_id->context;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_device *ib_dev = ib_conn->device->ib_device;
@@ -609,9 +607,9 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = ib_dev->attrs.max_qp_rd_atom;
- conn_param.initiator_depth = 1;
- conn_param.retry_count = 7;
- conn_param.rnr_retry_count = 6;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = 7;
+ conn_param.rnr_retry_count = 6;
memset(&req_hdr, 0, sizeof(req_hdr));
req_hdr.flags = ISER_ZBVA_NOT_SUP;
@@ -638,7 +636,7 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id,
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
- iser_conn = (struct iser_conn *)cma_id->context;
+ iser_conn = cma_id->context;
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@@ -661,7 +659,7 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id,
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
{
- struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+ struct iser_conn *iser_conn = cma_id->context;
if (iser_conn_terminate(iser_conn)) {
if (iser_conn->iscsi_conn)
@@ -675,7 +673,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
bool destroy)
{
- struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+ struct iser_conn *iser_conn = cma_id->context;
/*
* We are not guaranteed that we visited disconnected_handler
@@ -687,12 +685,13 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
complete(&iser_conn->ib_completion);
}
-static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+static int iser_cma_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event)
{
struct iser_conn *iser_conn;
int ret = 0;
- iser_conn = (struct iser_conn *)cma_id->context;
+ iser_conn = cma_id->context;
iser_info("%s (%d): status %d conn %p id %p\n",
rdma_event_msg(event->event), event->event,
event->status, cma_id->context, cma_id);
@@ -757,7 +756,6 @@ void iser_conn_init(struct iser_conn *iser_conn)
INIT_LIST_HEAD(&iser_conn->conn_list);
mutex_init(&iser_conn->state_mutex);
- ib_conn->post_recv_buf_count = 0;
ib_conn->reg_cqe.done = iser_reg_comp;
}
@@ -765,10 +763,8 @@ void iser_conn_init(struct iser_conn *iser_conn)
* starts the process of connecting to the target
* sleeps until the connection is established or rejected
*/
-int iser_connect(struct iser_conn *iser_conn,
- struct sockaddr *src_addr,
- struct sockaddr *dst_addr,
- int non_blocking)
+int iser_connect(struct iser_conn *iser_conn, struct sockaddr *src_addr,
+ struct sockaddr *dst_addr, int non_blocking)
{
struct ib_conn *ib_conn = &iser_conn->ib_conn;
int err = 0;
@@ -785,8 +781,7 @@ int iser_connect(struct iser_conn *iser_conn,
iser_conn->state = ISER_CONN_PENDING;
ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
- (void *)iser_conn,
- RDMA_PS_TCP, IB_QPT_RC);
+ iser_conn, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ib_conn->cma_id)) {
err = PTR_ERR(ib_conn->cma_id);
iser_err("rdma_create_id failed: %d\n", err);
@@ -829,7 +824,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iser_login_desc *desc = &iser_conn->login_desc;
struct ib_recv_wr wr;
- int ib_ret;
+ int ret;
desc->sge.addr = desc->rsp_dma;
desc->sge.length = ISER_RX_LOGIN_SIZE;
@@ -841,46 +836,30 @@ int iser_post_recvl(struct iser_conn *iser_conn)
wr.num_sge = 1;
wr.next = NULL;
- ib_conn->post_recv_buf_count++;
- ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL);
- if (ib_ret) {
- iser_err("ib_post_recv failed ret=%d\n", ib_ret);
- ib_conn->post_recv_buf_count--;
- }
+ ret = ib_post_recv(ib_conn->qp, &wr, NULL);
+ if (unlikely(ret))
+ iser_err("ib_post_recv login failed ret=%d\n", ret);
- return ib_ret;
+ return ret;
}
-int iser_post_recvm(struct iser_conn *iser_conn, int count)
+int iser_post_recvm(struct iser_conn *iser_conn, struct iser_rx_desc *rx_desc)
{
struct ib_conn *ib_conn = &iser_conn->ib_conn;
- unsigned int my_rx_head = iser_conn->rx_desc_head;
- struct iser_rx_desc *rx_desc;
- struct ib_recv_wr *wr;
- int i, ib_ret;
-
- for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
- rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_desc->cqe.done = iser_task_rsp;
- wr->wr_cqe = &rx_desc->cqe;
- wr->sg_list = &rx_desc->rx_sg;
- wr->num_sge = 1;
- wr->next = wr + 1;
- my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
- }
+ struct ib_recv_wr wr;
+ int ret;
- wr--;
- wr->next = NULL; /* mark end of work requests list */
+ rx_desc->cqe.done = iser_task_rsp;
+ wr.wr_cqe = &rx_desc->cqe;
+ wr.sg_list = &rx_desc->rx_sg;
+ wr.num_sge = 1;
+ wr.next = NULL;
- ib_conn->post_recv_buf_count += count;
- ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL);
- if (unlikely(ib_ret)) {
- iser_err("ib_post_recv failed ret=%d\n", ib_ret);
- ib_conn->post_recv_buf_count -= count;
- } else
- iser_conn->rx_desc_head = my_rx_head;
+ ret = ib_post_recv(ib_conn->qp, &wr, NULL);
+ if (unlikely(ret))
+ iser_err("ib_post_recv failed ret=%d\n", ret);
- return ib_ret;
+ return ret;
}
@@ -888,16 +867,14 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
* iser_post_send - Initiate a Send DTO operation
* @ib_conn: connection RDMA resources
* @tx_desc: iSER TX descriptor
- * @signal: true to send work request as SIGNALED
*
* Return: 0 on success, -1 on failure
*/
-int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
- bool signal)
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc)
{
struct ib_send_wr *wr = &tx_desc->send_wr;
struct ib_send_wr *first_wr;
- int ib_ret;
+ int ret;
ib_dma_sync_single_for_device(ib_conn->device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN,
@@ -908,7 +885,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
wr->sg_list = tx_desc->tx_sg;
wr->num_sge = tx_desc->num_sge;
wr->opcode = IB_WR_SEND;
- wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
+ wr->send_flags = IB_SEND_SIGNALED;
if (tx_desc->inv_wr.next)
first_wr = &tx_desc->inv_wr;
@@ -917,19 +894,19 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
else
first_wr = wr;
- ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL);
- if (unlikely(ib_ret))
+ ret = ib_post_send(ib_conn->qp, first_wr, NULL);
+ if (unlikely(ret))
iser_err("ib_post_send failed, ret:%d opcode:%d\n",
- ib_ret, wr->opcode);
+ ret, wr->opcode);
- return ib_ret;
+ return ret;
}
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
- struct iser_fr_desc *desc = reg->mem_h;
+ struct iser_fr_desc *desc = reg->desc;
unsigned long sector_size = iser_task->sc->device->sector_size;
struct ib_mr_status mr_status;
int ret;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 636d590765f9..b360a1527cd1 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -42,11 +42,12 @@ MODULE_PARM_DESC(sg_tablesize,
static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(device_list);
+static struct workqueue_struct *isert_login_wq;
static struct workqueue_struct *isert_comp_wq;
static struct workqueue_struct *isert_release_wq;
static int
-isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
+isert_put_response(struct iscsit_conn *conn, struct iscsit_cmd *cmd);
static int
isert_login_post_recv(struct isert_conn *isert_conn);
static int
@@ -230,7 +231,7 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
- if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)
+ if (ib_dev->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER)
device->pi_capable = true;
else
device->pi_capable = false;
@@ -909,7 +910,7 @@ isert_login_post_recv(struct isert_conn *isert_conn)
}
static int
-isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
+isert_put_login_tx(struct iscsit_conn *conn, struct iscsi_login *login,
u32 length)
{
struct isert_conn *isert_conn = conn->context;
@@ -976,7 +977,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
{
struct iser_rx_desc *rx_desc = isert_conn->login_desc;
int rx_buflen = isert_conn->login_req_len;
- struct iscsi_conn *conn = isert_conn->conn;
+ struct iscsit_conn *conn = isert_conn->conn;
struct iscsi_login *login = conn->conn_login;
int size;
@@ -1017,24 +1018,24 @@ isert_rx_login_req(struct isert_conn *isert_conn)
complete(&isert_conn->login_comp);
return;
}
- schedule_delayed_work(&conn->login_work, 0);
+ queue_delayed_work(isert_login_wq, &conn->login_work, 0);
}
-static struct iscsi_cmd
-*isert_allocate_cmd(struct iscsi_conn *conn, struct iser_rx_desc *rx_desc)
+static struct iscsit_cmd
+*isert_allocate_cmd(struct iscsit_conn *conn, struct iser_rx_desc *rx_desc)
{
struct isert_conn *isert_conn = conn->context;
struct isert_cmd *isert_cmd;
- struct iscsi_cmd *cmd;
+ struct iscsit_cmd *cmd;
cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE);
if (!cmd) {
- isert_err("Unable to allocate iscsi_cmd + isert_cmd\n");
+ isert_err("Unable to allocate iscsit_cmd + isert_cmd\n");
return NULL;
}
isert_cmd = iscsit_priv_cmd(cmd);
isert_cmd->conn = isert_conn;
- isert_cmd->iscsi_cmd = cmd;
+ isert_cmd->iscsit_cmd = cmd;
isert_cmd->rx_desc = rx_desc;
return cmd;
@@ -1042,10 +1043,10 @@ static struct iscsi_cmd
static int
isert_handle_scsi_cmd(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd,
+ struct isert_cmd *isert_cmd, struct iscsit_cmd *cmd,
struct iser_rx_desc *rx_desc, unsigned char *buf)
{
- struct iscsi_conn *conn = isert_conn->conn;
+ struct iscsit_conn *conn = isert_conn->conn;
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf;
int imm_data, imm_data_len, unsol_data, sg_nents, rc;
bool dump_payload = false;
@@ -1114,8 +1115,8 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
struct iser_rx_desc *rx_desc, unsigned char *buf)
{
struct scatterlist *sg_start;
- struct iscsi_conn *conn = isert_conn->conn;
- struct iscsi_cmd *cmd = NULL;
+ struct iscsit_conn *conn = isert_conn->conn;
+ struct iscsit_cmd *cmd = NULL;
struct iscsi_data *hdr = (struct iscsi_data *)buf;
u32 unsol_data_len = ntoh24(hdr->dlength);
int rc, sg_nents, sg_off, page_off;
@@ -1171,10 +1172,10 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
static int
isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc,
+ struct iscsit_cmd *cmd, struct iser_rx_desc *rx_desc,
unsigned char *buf)
{
- struct iscsi_conn *conn = isert_conn->conn;
+ struct iscsit_conn *conn = isert_conn->conn;
struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf;
int rc;
@@ -1190,10 +1191,10 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
static int
isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc,
+ struct iscsit_cmd *cmd, struct iser_rx_desc *rx_desc,
struct iscsi_text *hdr)
{
- struct iscsi_conn *conn = isert_conn->conn;
+ struct iscsit_conn *conn = isert_conn->conn;
u32 payload_length = ntoh24(hdr->dlength);
int rc;
unsigned char *text_in = NULL;
@@ -1220,8 +1221,8 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
uint32_t write_stag, uint64_t write_va)
{
struct iscsi_hdr *hdr = isert_get_iscsi_hdr(rx_desc);
- struct iscsi_conn *conn = isert_conn->conn;
- struct iscsi_cmd *cmd;
+ struct iscsit_conn *conn = isert_conn->conn;
+ struct iscsit_cmd *cmd;
struct isert_cmd *isert_cmd;
int ret = -EINVAL;
u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK);
@@ -1404,7 +1405,7 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
static void
isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn)
{
- struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
+ struct se_cmd *se_cmd = &cmd->iscsit_cmd->se_cmd;
enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
if (!cmd->rw.nr_ops)
@@ -1426,9 +1427,9 @@ isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn)
static void
isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct iscsit_cmd *cmd = isert_cmd->iscsit_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
- struct iscsi_conn *conn = isert_conn->conn;
+ struct iscsit_conn *conn = isert_conn->conn;
struct iscsi_text_rsp *hdr;
isert_dbg("Cmd %p\n", isert_cmd);
@@ -1575,7 +1576,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct isert_device *device = isert_conn->device;
struct iser_tx_desc *desc = cqe_to_tx_desc(wc->wr_cqe);
struct isert_cmd *isert_cmd = tx_desc_to_cmd(desc);
- struct se_cmd *cmd = &isert_cmd->iscsi_cmd->se_cmd;
+ struct se_cmd *cmd = &isert_cmd->iscsit_cmd->se_cmd;
int ret = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
@@ -1604,7 +1605,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
/*
* XXX: isert_put_response() failure is not retried.
*/
- ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+ ret = isert_put_response(isert_conn->conn, isert_cmd->iscsit_cmd);
if (ret)
pr_warn_ratelimited("isert_put_response() ret: %d\n", ret);
}
@@ -1617,7 +1618,7 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct isert_device *device = isert_conn->device;
struct iser_tx_desc *desc = cqe_to_tx_desc(wc->wr_cqe);
struct isert_cmd *isert_cmd = tx_desc_to_cmd(desc);
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct iscsit_cmd *cmd = isert_cmd->iscsit_cmd;
struct se_cmd *se_cmd = &cmd->se_cmd;
int ret = 0;
@@ -1662,7 +1663,7 @@ isert_do_control_comp(struct work_struct *work)
struct isert_cmd, comp_work);
struct isert_conn *isert_conn = isert_cmd->conn;
struct ib_device *ib_dev = isert_conn->cm_id->device;
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct iscsit_cmd *cmd = isert_cmd->iscsit_cmd;
isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
@@ -1720,7 +1721,7 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc)
isert_dbg("Cmd %p\n", isert_cmd);
- switch (isert_cmd->iscsi_cmd->i_state) {
+ switch (isert_cmd->iscsit_cmd->i_state) {
case ISTATE_SEND_TASKMGTRSP:
case ISTATE_SEND_LOGOUTRSP:
case ISTATE_SEND_REJECT:
@@ -1731,7 +1732,7 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc)
queue_work(isert_comp_wq, &isert_cmd->comp_work);
return;
default:
- isert_cmd->iscsi_cmd->i_state = ISTATE_SENT_STATUS;
+ isert_cmd->iscsit_cmd->i_state = ISTATE_SENT_STATUS;
isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
break;
}
@@ -1755,7 +1756,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
}
static int
-isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+isert_put_response(struct iscsit_conn *conn, struct iscsit_cmd *cmd)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
@@ -1806,7 +1807,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
}
static void
-isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+isert_aborted_task(struct iscsit_conn *conn, struct iscsit_cmd *cmd)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
@@ -1822,7 +1823,7 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
}
static enum target_prot_op
-isert_get_sup_prot_ops(struct iscsi_conn *conn)
+isert_get_sup_prot_ops(struct iscsit_conn *conn)
{
struct isert_conn *isert_conn = conn->context;
struct isert_device *device = isert_conn->device;
@@ -1842,7 +1843,7 @@ isert_get_sup_prot_ops(struct iscsi_conn *conn)
}
static int
-isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
+isert_put_nopin(struct iscsit_cmd *cmd, struct iscsit_conn *conn,
bool nopout_response)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
@@ -1862,7 +1863,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
}
static int
-isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+isert_put_logout_rsp(struct iscsit_cmd *cmd, struct iscsit_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
@@ -1880,7 +1881,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
}
static int
-isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+isert_put_tm_rsp(struct iscsit_cmd *cmd, struct iscsit_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
@@ -1898,7 +1899,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
}
static int
-isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+isert_put_reject(struct iscsit_cmd *cmd, struct iscsit_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
@@ -1933,7 +1934,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
}
static int
-isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+isert_put_text_rsp(struct iscsit_cmd *cmd, struct iscsit_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
@@ -2035,7 +2036,7 @@ static int
isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
- struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
+ struct se_cmd *se_cmd = &cmd->iscsit_cmd->se_cmd;
enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
u8 port_num = conn->cm_id->port_num;
u64 addr;
@@ -2048,7 +2049,7 @@ isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
if (dir == DMA_FROM_DEVICE) {
addr = cmd->write_va;
rkey = cmd->write_stag;
- offset = cmd->iscsi_cmd->write_data_done;
+ offset = cmd->iscsit_cmd->write_data_done;
} else {
addr = cmd->read_va;
rkey = cmd->read_stag;
@@ -2088,7 +2089,7 @@ rdma_ctx_post:
}
static int
-isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+isert_put_datain(struct iscsit_conn *conn, struct iscsit_cmd *cmd)
{
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
@@ -2129,7 +2130,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
}
static int
-isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
+isert_get_dataout(struct iscsit_conn *conn, struct iscsit_cmd *cmd, bool recovery)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
int ret;
@@ -2147,7 +2148,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
}
static int
-isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
+isert_immediate_queue(struct iscsit_conn *conn, struct iscsit_cmd *cmd, int state)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
int ret = 0;
@@ -2172,7 +2173,7 @@ isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
}
static int
-isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
+isert_response_queue(struct iscsit_conn *conn, struct iscsit_cmd *cmd, int state)
{
struct isert_conn *isert_conn = conn->context;
int ret;
@@ -2332,7 +2333,7 @@ isert_rdma_accept(struct isert_conn *isert_conn)
}
static int
-isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
+isert_get_login_rx(struct iscsit_conn *conn, struct iscsi_login *login)
{
struct isert_conn *isert_conn = conn->context;
int ret;
@@ -2348,9 +2349,9 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
/*
* For login requests after the first PDU, isert_rx_login_req() will
- * kick schedule_delayed_work(&conn->login_work) as the packet is
- * received, which turns this callback from iscsi_target_do_login_rx()
- * into a NOP.
+ * kick queue_delayed_work(isert_login_wq, &conn->login_work) as
+ * the packet is received, which turns this callback from
+ * iscsi_target_do_login_rx() into a NOP.
*/
if (!login->first_request)
return 0;
@@ -2368,7 +2369,7 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
}
static void
-isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
+isert_set_conn_info(struct iscsi_np *np, struct iscsit_conn *conn,
struct isert_conn *isert_conn)
{
struct rdma_cm_id *cm_id = isert_conn->cm_id;
@@ -2381,7 +2382,7 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
}
static int
-isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
+isert_accept_np(struct iscsi_np *np, struct iscsit_conn *conn)
{
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn;
@@ -2489,7 +2490,7 @@ static void isert_release_work(struct work_struct *work)
static void
isert_wait4logout(struct isert_conn *isert_conn)
{
- struct iscsi_conn *conn = isert_conn->conn;
+ struct iscsit_conn *conn = isert_conn->conn;
isert_info("conn %p\n", isert_conn);
@@ -2501,9 +2502,9 @@ isert_wait4logout(struct isert_conn *isert_conn)
}
static void
-isert_wait4cmds(struct iscsi_conn *conn)
+isert_wait4cmds(struct iscsit_conn *conn)
{
- isert_info("iscsi_conn %p\n", conn);
+ isert_info("iscsit_conn %p\n", conn);
if (conn->sess) {
target_stop_session(conn->sess->se_sess);
@@ -2521,9 +2522,9 @@ isert_wait4cmds(struct iscsi_conn *conn)
* before blocking on the target_wait_for_session_cmds
*/
static void
-isert_put_unsol_pending_cmds(struct iscsi_conn *conn)
+isert_put_unsol_pending_cmds(struct iscsit_conn *conn)
{
- struct iscsi_cmd *cmd, *tmp;
+ struct iscsit_cmd *cmd, *tmp;
static LIST_HEAD(drop_cmd_list);
spin_lock_bh(&conn->cmd_lock);
@@ -2546,7 +2547,7 @@ isert_put_unsol_pending_cmds(struct iscsi_conn *conn)
}
}
-static void isert_wait_conn(struct iscsi_conn *conn)
+static void isert_wait_conn(struct iscsit_conn *conn)
{
struct isert_conn *isert_conn = conn->context;
@@ -2564,7 +2565,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
queue_work(isert_release_wq, &isert_conn->release_work);
}
-static void isert_free_conn(struct iscsi_conn *conn)
+static void isert_free_conn(struct iscsit_conn *conn)
{
struct isert_conn *isert_conn = conn->context;
@@ -2572,7 +2573,7 @@ static void isert_free_conn(struct iscsi_conn *conn)
isert_put_conn(isert_conn);
}
-static void isert_get_rx_pdu(struct iscsi_conn *conn)
+static void isert_get_rx_pdu(struct iscsit_conn *conn)
{
struct completion comp;
@@ -2606,20 +2607,23 @@ static struct iscsit_transport iser_target_transport = {
static int __init isert_init(void)
{
- int ret;
+ isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0);
+ if (!isert_login_wq) {
+ isert_err("Unable to allocate isert_login_wq\n");
+ return -ENOMEM;
+ }
isert_comp_wq = alloc_workqueue("isert_comp_wq",
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!isert_comp_wq) {
isert_err("Unable to allocate isert_comp_wq\n");
- return -ENOMEM;
+ goto destroy_login_wq;
}
isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
if (!isert_release_wq) {
isert_err("Unable to allocate isert_release_wq\n");
- ret = -ENOMEM;
goto destroy_comp_wq;
}
@@ -2630,17 +2634,20 @@ static int __init isert_init(void)
destroy_comp_wq:
destroy_workqueue(isert_comp_wq);
+destroy_login_wq:
+ destroy_workqueue(isert_login_wq);
- return ret;
+ return -ENOMEM;
}
static void __exit isert_exit(void)
{
- flush_scheduled_work();
+ flush_workqueue(isert_login_wq);
destroy_workqueue(isert_release_wq);
destroy_workqueue(isert_comp_wq);
iscsit_unregister_transport(&iser_target_transport);
isert_info("iSER_TARGET[0] - Released iser_target_transport\n");
+ destroy_workqueue(isert_login_wq);
}
MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index ca8cfebe26ca..0b2dfd6e7e27 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -146,7 +146,7 @@ struct isert_cmd {
u64 pdu_buf_dma;
u32 pdu_buf_len;
struct isert_conn *conn;
- struct iscsi_cmd *iscsi_cmd;
+ struct iscsit_cmd *iscsit_cmd;
struct iser_tx_desc tx_desc;
struct iser_rx_desc *rx_desc;
struct rdma_rw_ctx rw;
@@ -173,7 +173,7 @@ struct isert_conn {
u64 login_rsp_dma;
struct iser_rx_desc *rx_descs;
struct ib_recv_wr rx_wr[ISERT_QP_MAX_RECV_DTOS];
- struct iscsi_conn *conn;
+ struct iscsit_conn *conn;
struct list_head node;
struct completion login_comp;
struct completion login_req_comp;
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index 42d557dff19d..29b3d8fce3f5 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -124,8 +124,8 @@ static struct vnic_stats vnic_gstrings_stats[] = {
static void vnic_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
- strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+ strscpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+ strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
sizeof(drvinfo->bus_info));
}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index aeff68f582d3..071f35711468 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -50,7 +50,6 @@
* netdev functionality.
*/
-#include <linux/module.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
diff --git a/drivers/infiniband/ulp/rtrs/Makefile b/drivers/infiniband/ulp/rtrs/Makefile
index 3898509be270..5227e7788e1f 100644
--- a/drivers/infiniband/ulp/rtrs/Makefile
+++ b/drivers/infiniband/ulp/rtrs/Makefile
@@ -1,12 +1,18 @@
# SPDX-License-Identifier: GPL-2.0-or-later
+CFLAGS_rtrs-clt-trace.o = -I$(src)
+
rtrs-client-y := rtrs-clt.o \
rtrs-clt-stats.o \
- rtrs-clt-sysfs.o
+ rtrs-clt-sysfs.o \
+ rtrs-clt-trace.o
+
+CFLAGS_rtrs-srv-trace.o = -I$(src)
rtrs-server-y := rtrs-srv.o \
rtrs-srv-stats.o \
- rtrs-srv-sysfs.o
+ rtrs-srv-sysfs.o \
+ rtrs-srv-trace.o
rtrs-core-y := rtrs.o
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
index 76e4352fe3f6..1e6ffafa2db3 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
@@ -13,8 +13,8 @@
void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- struct rtrs_clt_stats *stats = sess->stats;
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_clt_stats *stats = clt_path->stats;
struct rtrs_clt_stats_pcpu *s;
int cpu;
@@ -32,11 +32,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats)
{
- struct rtrs_clt_stats_pcpu *s;
-
- s = get_cpu_ptr(stats->pcpu_stats);
- s->rdma.failover_cnt++;
- put_cpu_ptr(stats->pcpu_stats);
+ this_cpu_inc(stats->pcpu_stats->rdma.failover_cnt);
}
int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf)
@@ -169,19 +165,15 @@ int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *s, bool enable)
static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats,
size_t size, int d)
{
- struct rtrs_clt_stats_pcpu *s;
-
- s = get_cpu_ptr(stats->pcpu_stats);
- s->rdma.dir[d].cnt++;
- s->rdma.dir[d].size_total += size;
- put_cpu_ptr(stats->pcpu_stats);
+ this_cpu_inc(stats->pcpu_stats->rdma.dir[d].cnt);
+ this_cpu_add(stats->pcpu_stats->rdma.dir[d].size_total, size);
}
void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)
{
struct rtrs_clt_con *con = req->con;
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- struct rtrs_clt_stats *stats = sess->stats;
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_clt_stats *stats = clt_path->stats;
unsigned int len;
len = req->usr_len + req->data_len;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
index 0e69180c3771..d3c436ead694 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
@@ -16,21 +16,21 @@
#define MIN_MAX_RECONN_ATT -1
#define MAX_MAX_RECONN_ATT 9999
-static void rtrs_clt_sess_release(struct kobject *kobj)
+static void rtrs_clt_path_release(struct kobject *kobj)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
- free_sess(sess);
+ free_path(clt_path);
}
static struct kobj_type ktype_sess = {
.sysfs_ops = &kobj_sysfs_ops,
- .release = rtrs_clt_sess_release
+ .release = rtrs_clt_path_release
};
-static void rtrs_clt_sess_stats_release(struct kobject *kobj)
+static void rtrs_clt_path_stats_release(struct kobject *kobj)
{
struct rtrs_clt_stats *stats;
@@ -43,14 +43,15 @@ static void rtrs_clt_sess_stats_release(struct kobject *kobj)
static struct kobj_type ktype_stats = {
.sysfs_ops = &kobj_sysfs_ops,
- .release = rtrs_clt_sess_stats_release,
+ .release = rtrs_clt_path_stats_release,
};
static ssize_t max_reconnect_attempts_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
- struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev);
+ struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess,
+ dev);
return sysfs_emit(page, "%d\n",
rtrs_clt_get_max_reconnect_attempts(clt));
@@ -63,7 +64,8 @@ static ssize_t max_reconnect_attempts_store(struct device *dev,
{
int value;
int ret;
- struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev);
+ struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess,
+ dev);
ret = kstrtoint(buf, 10, &value);
if (ret) {
@@ -90,9 +92,9 @@ static ssize_t mpath_policy_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
- struct rtrs_clt *clt;
+ struct rtrs_clt_sess *clt;
- clt = container_of(dev, struct rtrs_clt, dev);
+ clt = container_of(dev, struct rtrs_clt_sess, dev);
switch (clt->mp_policy) {
case MP_POLICY_RR:
@@ -114,12 +116,12 @@ static ssize_t mpath_policy_store(struct device *dev,
const char *buf,
size_t count)
{
- struct rtrs_clt *clt;
+ struct rtrs_clt_sess *clt;
int value;
int ret;
size_t len = 0;
- clt = container_of(dev, struct rtrs_clt, dev);
+ clt = container_of(dev, struct rtrs_clt_sess, dev);
ret = kstrtoint(buf, 10, &value);
if (!ret && (value == MP_POLICY_RR ||
@@ -154,8 +156,7 @@ static DEVICE_ATTR_RW(mpath_policy);
static ssize_t add_path_show(struct device *dev,
struct device_attribute *attr, char *page)
{
- return sysfs_emit(
- page,
+ return sysfs_emit(page,
"Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
@@ -169,12 +170,12 @@ static ssize_t add_path_store(struct device *dev,
.src = &srcaddr,
.dst = &dstaddr
};
- struct rtrs_clt *clt;
+ struct rtrs_clt_sess *clt;
const char *nl;
size_t len;
int err;
- clt = container_of(dev, struct rtrs_clt, dev);
+ clt = container_of(dev, struct rtrs_clt_sess, dev);
nl = strchr(buf, '\n');
if (nl)
@@ -197,10 +198,10 @@ static DEVICE_ATTR_RW(add_path);
static ssize_t rtrs_clt_state_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
- if (sess->state == RTRS_CLT_CONNECTED)
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+ if (clt_path->state == RTRS_CLT_CONNECTED)
return sysfs_emit(page, "connected\n");
return sysfs_emit(page, "disconnected\n");
@@ -219,16 +220,16 @@ static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int ret;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
if (!sysfs_streq(buf, "1")) {
- rtrs_err(sess->clt, "%s: unknown value: '%s'\n",
+ rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n",
attr->attr.name, buf);
return -EINVAL;
}
- ret = rtrs_clt_reconnect_from_sysfs(sess);
+ ret = rtrs_clt_reconnect_from_sysfs(clt_path);
if (ret)
return ret;
@@ -249,15 +250,15 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
if (!sysfs_streq(buf, "1")) {
- rtrs_err(sess->clt, "%s: unknown value: '%s'\n",
+ rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n",
attr->attr.name, buf);
return -EINVAL;
}
- rtrs_clt_close_conns(sess, true);
+ rtrs_clt_close_conns(clt_path, true);
return count;
}
@@ -276,16 +277,16 @@ static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int ret;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
if (!sysfs_streq(buf, "1")) {
- rtrs_err(sess->clt, "%s: unknown value: '%s'\n",
+ rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n",
attr->attr.name, buf);
return -EINVAL;
}
- ret = rtrs_clt_remove_path_from_sysfs(sess, &attr->attr);
+ ret = rtrs_clt_remove_path_from_sysfs(clt_path, &attr->attr);
if (ret)
return ret;
@@ -333,11 +334,11 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(kobj, typeof(*sess), kobj);
+ clt_path = container_of(kobj, typeof(*clt_path), kobj);
- return sysfs_emit(page, "%u\n", sess->hca_port);
+ return sysfs_emit(page, "%u\n", clt_path->hca_port);
}
static struct kobj_attribute rtrs_clt_hca_port_attr =
@@ -347,11 +348,11 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
- return sysfs_emit(page, "%s\n", sess->hca_name);
+ return sysfs_emit(page, "%s\n", clt_path->hca_name);
}
static struct kobj_attribute rtrs_clt_hca_name_attr =
@@ -361,12 +362,12 @@ static ssize_t rtrs_clt_cur_latency_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
return sysfs_emit(page, "%lld ns\n",
- ktime_to_ns(sess->s.hb_cur_latency));
+ ktime_to_ns(clt_path->s.hb_cur_latency));
}
static struct kobj_attribute rtrs_clt_cur_latency_attr =
@@ -376,11 +377,11 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int len;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
- len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page,
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+ len = sockaddr_to_str((struct sockaddr *)&clt_path->s.src_addr, page,
PAGE_SIZE);
len += sysfs_emit_at(page, len, "\n");
return len;
@@ -393,11 +394,11 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int len;
- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
- len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page,
+ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+ len = sockaddr_to_str((struct sockaddr *)&clt_path->s.dst_addr, page,
PAGE_SIZE);
len += sysfs_emit_at(page, len, "\n");
return len;
@@ -406,7 +407,7 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj,
static struct kobj_attribute rtrs_clt_dst_addr_attr =
__ATTR(dst_addr, 0444, rtrs_clt_dst_addr_show, NULL);
-static struct attribute *rtrs_clt_sess_attrs[] = {
+static struct attribute *rtrs_clt_path_attrs[] = {
&rtrs_clt_hca_name_attr.attr,
&rtrs_clt_hca_port_attr.attr,
&rtrs_clt_src_addr_attr.attr,
@@ -419,42 +420,43 @@ static struct attribute *rtrs_clt_sess_attrs[] = {
NULL,
};
-static const struct attribute_group rtrs_clt_sess_attr_group = {
- .attrs = rtrs_clt_sess_attrs,
+static const struct attribute_group rtrs_clt_path_attr_group = {
+ .attrs = rtrs_clt_path_attrs,
};
-int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
+int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_sess *clt = clt_path->clt;
char str[NAME_MAX];
int err;
struct rtrs_addr path = {
- .src = &sess->s.src_addr,
- .dst = &sess->s.dst_addr,
+ .src = &clt_path->s.src_addr,
+ .dst = &clt_path->s.dst_addr,
};
rtrs_addr_to_str(&path, str, sizeof(str));
- err = kobject_init_and_add(&sess->kobj, &ktype_sess, clt->kobj_paths,
+ err = kobject_init_and_add(&clt_path->kobj, &ktype_sess,
+ clt->kobj_paths,
"%s", str);
if (err) {
pr_err("kobject_init_and_add: %d\n", err);
- kobject_put(&sess->kobj);
+ kobject_put(&clt_path->kobj);
return err;
}
- err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group);
+ err = sysfs_create_group(&clt_path->kobj, &rtrs_clt_path_attr_group);
if (err) {
pr_err("sysfs_create_group(): %d\n", err);
goto put_kobj;
}
- err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats,
- &sess->kobj, "stats");
+ err = kobject_init_and_add(&clt_path->stats->kobj_stats, &ktype_stats,
+ &clt_path->kobj, "stats");
if (err) {
pr_err("kobject_init_and_add: %d\n", err);
- kobject_put(&sess->stats->kobj_stats);
+ kobject_put(&clt_path->stats->kobj_stats);
goto remove_group;
}
- err = sysfs_create_group(&sess->stats->kobj_stats,
+ err = sysfs_create_group(&clt_path->stats->kobj_stats,
&rtrs_clt_stats_attr_group);
if (err) {
pr_err("failed to create stats sysfs group, err: %d\n", err);
@@ -464,25 +466,25 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
return 0;
put_kobj_stats:
- kobject_del(&sess->stats->kobj_stats);
- kobject_put(&sess->stats->kobj_stats);
+ kobject_del(&clt_path->stats->kobj_stats);
+ kobject_put(&clt_path->stats->kobj_stats);
remove_group:
- sysfs_remove_group(&sess->kobj, &rtrs_clt_sess_attr_group);
+ sysfs_remove_group(&clt_path->kobj, &rtrs_clt_path_attr_group);
put_kobj:
- kobject_del(&sess->kobj);
- kobject_put(&sess->kobj);
+ kobject_del(&clt_path->kobj);
+ kobject_put(&clt_path->kobj);
return err;
}
-void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess,
+void rtrs_clt_destroy_path_files(struct rtrs_clt_path *clt_path,
const struct attribute *sysfs_self)
{
- kobject_del(&sess->stats->kobj_stats);
- kobject_put(&sess->stats->kobj_stats);
+ kobject_del(&clt_path->stats->kobj_stats);
+ kobject_put(&clt_path->stats->kobj_stats);
if (sysfs_self)
- sysfs_remove_file_self(&sess->kobj, sysfs_self);
- kobject_del(&sess->kobj);
+ sysfs_remove_file_self(&clt_path->kobj, sysfs_self);
+ kobject_del(&clt_path->kobj);
}
static struct attribute *rtrs_clt_attrs[] = {
@@ -496,12 +498,12 @@ static const struct attribute_group rtrs_clt_attr_group = {
.attrs = rtrs_clt_attrs,
};
-int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt)
+int rtrs_clt_create_sysfs_root_files(struct rtrs_clt_sess *clt)
{
return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group);
}
-void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt)
+void rtrs_clt_destroy_sysfs_root(struct rtrs_clt_sess *clt)
{
sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c
new file mode 100644
index 000000000000..f14fa1f36ce8
--- /dev/null
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#include "rtrs.h"
+#include "rtrs-clt.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "rtrs-clt-trace.h"
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h
new file mode 100644
index 000000000000..7738e2676855
--- /dev/null
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rtrs_clt
+
+#if !defined(_TRACE_RTRS_CLT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RTRS_CLT_H
+
+#include <linux/tracepoint.h>
+
+struct rtrs_clt_path;
+struct rtrs_clt_sess;
+
+TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTING);
+TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTING_ERR);
+TRACE_DEFINE_ENUM(RTRS_CLT_RECONNECTING);
+TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTED);
+TRACE_DEFINE_ENUM(RTRS_CLT_CLOSING);
+TRACE_DEFINE_ENUM(RTRS_CLT_CLOSED);
+TRACE_DEFINE_ENUM(RTRS_CLT_DEAD);
+
+#define show_rtrs_clt_state(x) \
+ __print_symbolic(x, \
+ { RTRS_CLT_CONNECTING, "CONNECTING" }, \
+ { RTRS_CLT_CONNECTING_ERR, "CONNECTING_ERR" }, \
+ { RTRS_CLT_RECONNECTING, "RECONNECTING" }, \
+ { RTRS_CLT_CONNECTED, "CONNECTED" }, \
+ { RTRS_CLT_CLOSING, "CLOSING" }, \
+ { RTRS_CLT_CLOSED, "CLOSED" }, \
+ { RTRS_CLT_DEAD, "DEAD" })
+
+DECLARE_EVENT_CLASS(rtrs_clt_conn_class,
+ TP_PROTO(struct rtrs_clt_path *clt_path),
+
+ TP_ARGS(clt_path),
+
+ TP_STRUCT__entry(
+ __field(int, state)
+ __field(int, reconnect_attempts)
+ __field(int, max_reconnect_attempts)
+ __field(int, fail_cnt)
+ __field(int, success_cnt)
+ __array(char, sessname, NAME_MAX)
+ ),
+
+ TP_fast_assign(
+ struct rtrs_clt_sess *clt = clt_path->clt;
+
+ __entry->state = clt_path->state;
+ __entry->reconnect_attempts = clt_path->reconnect_attempts;
+ __entry->max_reconnect_attempts = clt->max_reconnect_attempts;
+ __entry->fail_cnt = clt_path->stats->reconnects.fail_cnt;
+ __entry->success_cnt = clt_path->stats->reconnects.successful_cnt;
+ memcpy(__entry->sessname, kobject_name(&clt_path->kobj), NAME_MAX);
+ ),
+
+ TP_printk("RTRS-CLT: sess='%s' state=%s attempts='%d' max-attempts='%d' fail='%d' success='%d'",
+ __entry->sessname,
+ show_rtrs_clt_state(__entry->state),
+ __entry->reconnect_attempts,
+ __entry->max_reconnect_attempts,
+ __entry->fail_cnt,
+ __entry->success_cnt
+ )
+);
+
+#define DEFINE_CLT_CONN_EVENT(name) \
+DEFINE_EVENT(rtrs_clt_conn_class, rtrs_##name, \
+ TP_PROTO(struct rtrs_clt_path *clt_path), \
+ TP_ARGS(clt_path))
+
+DEFINE_CLT_CONN_EVENT(clt_reconnect_work);
+DEFINE_CLT_CONN_EVENT(clt_close_conns);
+DEFINE_CLT_CONN_EVENT(rdma_error_recovery);
+
+#endif /* _TRACE_RTRS_CLT_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE rtrs-clt-trace
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 15c0077dd27e..8546b8816524 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -16,6 +16,7 @@
#include "rtrs-clt.h"
#include "rtrs-log.h"
+#include "rtrs-clt-trace.h"
#define RTRS_CONNECT_TIMEOUT_MS 30000
/*
@@ -46,21 +47,24 @@ static struct rtrs_rdma_dev_pd dev_pd = {
static struct workqueue_struct *rtrs_wq;
static struct class *rtrs_clt_dev_class;
-static inline bool rtrs_clt_is_connected(const struct rtrs_clt *clt)
+static inline bool rtrs_clt_is_connected(const struct rtrs_clt_sess *clt)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
bool connected = false;
rcu_read_lock();
- list_for_each_entry_rcu(sess, &clt->paths_list, s.entry)
- connected |= READ_ONCE(sess->state) == RTRS_CLT_CONNECTED;
+ list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry)
+ if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED) {
+ connected = true;
+ break;
+ }
rcu_read_unlock();
return connected;
}
static struct rtrs_permit *
-__rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type)
+__rtrs_get_permit(struct rtrs_clt_sess *clt, enum rtrs_clt_con_type con_type)
{
size_t max_depth = clt->queue_depth;
struct rtrs_permit *permit;
@@ -87,7 +91,7 @@ __rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type)
return permit;
}
-static inline void __rtrs_put_permit(struct rtrs_clt *clt,
+static inline void __rtrs_put_permit(struct rtrs_clt_sess *clt,
struct rtrs_permit *permit)
{
clear_bit_unlock(permit->mem_id, clt->permits_map);
@@ -107,7 +111,7 @@ static inline void __rtrs_put_permit(struct rtrs_clt *clt,
* Context:
* Can sleep if @wait == RTRS_PERMIT_WAIT
*/
-struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt,
+struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt_sess *clt,
enum rtrs_clt_con_type con_type,
enum wait_type can_wait)
{
@@ -142,7 +146,8 @@ EXPORT_SYMBOL(rtrs_clt_get_permit);
* Context:
* Does not matter
*/
-void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit)
+void rtrs_clt_put_permit(struct rtrs_clt_sess *clt,
+ struct rtrs_permit *permit)
{
if (WARN_ON(!test_bit(permit->mem_id, clt->permits_map)))
return;
@@ -163,29 +168,29 @@ EXPORT_SYMBOL(rtrs_clt_put_permit);
/**
* rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit
- * @sess: client session pointer
+ * @clt_path: client path pointer
* @permit: permit for the allocation of the RDMA buffer
* Note:
* IO connection starts from 1.
* 0 connection is for user messages.
*/
static
-struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
+struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_path *clt_path,
struct rtrs_permit *permit)
{
int id = 0;
if (permit->con_type == RTRS_IO_CON)
- id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
+ id = (permit->cpu_id % (clt_path->s.irq_con_num - 1)) + 1;
- return to_clt_con(sess->s.con[id]);
+ return to_clt_con(clt_path->s.con[id]);
}
/**
* rtrs_clt_change_state() - change the session state through session state
* machine.
*
- * @sess: client session to change the state of.
+ * @clt_path: client path to change the state of.
* @new_state: state to change to.
*
* returns true if sess's state is changed to new state, otherwise return false.
@@ -193,15 +198,15 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
* Locks:
* state_wq lock must be hold.
*/
-static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
+static bool rtrs_clt_change_state(struct rtrs_clt_path *clt_path,
enum rtrs_clt_state new_state)
{
enum rtrs_clt_state old_state;
bool changed = false;
- lockdep_assert_held(&sess->state_wq.lock);
+ lockdep_assert_held(&clt_path->state_wq.lock);
- old_state = sess->state;
+ old_state = clt_path->state;
switch (new_state) {
case RTRS_CLT_CONNECTING:
switch (old_state) {
@@ -275,51 +280,45 @@ static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
break;
}
if (changed) {
- sess->state = new_state;
- wake_up_locked(&sess->state_wq);
+ clt_path->state = new_state;
+ wake_up_locked(&clt_path->state_wq);
}
return changed;
}
-static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess,
+static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path,
enum rtrs_clt_state old_state,
enum rtrs_clt_state new_state)
{
bool changed = false;
- spin_lock_irq(&sess->state_wq.lock);
- if (sess->state == old_state)
- changed = rtrs_clt_change_state(sess, new_state);
- spin_unlock_irq(&sess->state_wq.lock);
+ spin_lock_irq(&clt_path->state_wq.lock);
+ if (clt_path->state == old_state)
+ changed = rtrs_clt_change_state(clt_path, new_state);
+ spin_unlock_irq(&clt_path->state_wq.lock);
return changed;
}
+static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path);
static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+ trace_rtrs_rdma_error_recovery(clt_path);
- if (rtrs_clt_change_state_from_to(sess,
+ if (rtrs_clt_change_state_from_to(clt_path,
RTRS_CLT_CONNECTED,
RTRS_CLT_RECONNECTING)) {
- struct rtrs_clt *clt = sess->clt;
- unsigned int delay_ms;
-
- /*
- * Normal scenario, reconnect if we were successfully connected
- */
- delay_ms = clt->reconnect_delay_sec * 1000;
- queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
- msecs_to_jiffies(delay_ms +
- prandom_u32() % RTRS_RECONNECT_SEED));
+ queue_work(rtrs_wq, &clt_path->err_recovery_work);
} else {
/*
* Error can happen just on establishing new connection,
* so notify waiter with error state, waiter is responsible
* for cleaning the rest and reconnect if needed.
*/
- rtrs_clt_change_state_from_to(sess,
+ rtrs_clt_change_state_from_to(clt_path,
RTRS_CLT_CONNECTING,
RTRS_CLT_CONNECTING_ERR);
}
@@ -330,7 +329,7 @@ static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc)
struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
if (wc->status != IB_WC_SUCCESS) {
- rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n",
+ rtrs_err(con->c.path, "Failed IB_WR_REG_MR: %s\n",
ib_wc_status_msg(wc->status));
rtrs_rdma_error_recovery(con);
}
@@ -350,7 +349,7 @@ static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
if (wc->status != IB_WC_SUCCESS) {
- rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n",
+ rtrs_err(con->c.path, "Failed IB_WR_LOCAL_INV: %s\n",
ib_wc_status_msg(wc->status));
rtrs_rdma_error_recovery(con);
}
@@ -380,14 +379,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
bool notify, bool can_wait)
{
struct rtrs_clt_con *con = req->con;
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int err;
if (WARN_ON(!req->in_use))
return;
if (WARN_ON(!req->con))
return;
- sess = to_clt_sess(con->c.sess);
+ clt_path = to_clt_path(con->c.path);
if (req->sg_cnt) {
if (req->dir == DMA_FROM_DEVICE && req->need_inv) {
@@ -417,7 +416,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
refcount_inc(&req->ref);
err = rtrs_inv_rkey(req);
if (err) {
- rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n",
+ rtrs_err(con->c.path, "Send INV WR key=%#x: %d\n",
req->mr->rkey, err);
} else if (can_wait) {
wait_for_completion(&req->inv_comp);
@@ -433,21 +432,21 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
if (!refcount_dec_and_test(&req->ref))
return;
}
- ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
+ ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
req->sg_cnt, req->dir);
}
if (!refcount_dec_and_test(&req->ref))
return;
if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
- atomic_dec(&sess->stats->inflight);
+ atomic_dec(&clt_path->stats->inflight);
req->in_use = false;
req->con = NULL;
if (errno) {
- rtrs_err_rl(con->c.sess, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n",
- errno, kobject_name(&sess->kobj), sess->hca_name,
- sess->hca_port, notify);
+ rtrs_err_rl(con->c.path, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n",
+ errno, kobject_name(&clt_path->kobj), clt_path->hca_name,
+ clt_path->hca_port, notify);
}
if (notify)
@@ -459,12 +458,12 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
struct rtrs_rbuf *rbuf, u32 off,
u32 imm, struct ib_send_wr *wr)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
enum ib_send_flags flags;
struct ib_sge sge;
if (!req->sg_size) {
- rtrs_wrn(con->c.sess,
+ rtrs_wrn(con->c.path,
"Doing RDMA Write failed, no data supplied\n");
return -EINVAL;
}
@@ -472,16 +471,17 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
/* user data and user message in the first list element */
sge.addr = req->iu->dma_addr;
sge.length = req->sg_size;
- sge.lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ sge.lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
/*
* From time to time we have to post signalled sends,
* or send queue will fill up and only QP reset can help.
*/
- flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ?
+ flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ?
0 : IB_SEND_SIGNALED;
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr,
+ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev,
+ req->iu->dma_addr,
req->sg_size, DMA_TO_DEVICE);
return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1,
@@ -489,15 +489,15 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
imm, flags, wr, NULL);
}
-static void process_io_rsp(struct rtrs_clt_sess *sess, u32 msg_id,
+static void process_io_rsp(struct rtrs_clt_path *clt_path, u32 msg_id,
s16 errno, bool w_inval)
{
struct rtrs_clt_io_req *req;
- if (WARN_ON(msg_id >= sess->queue_depth))
+ if (WARN_ON(msg_id >= clt_path->queue_depth))
return;
- req = &sess->reqs[msg_id];
+ req = &clt_path->reqs[msg_id];
/* Drop need_inv if server responded with send with invalidation */
req->need_inv &= !w_inval;
complete_rdma_req(req, errno, true, false);
@@ -507,21 +507,21 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc)
{
struct rtrs_iu *iu;
int err;
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
- WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
+ WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0);
iu = container_of(wc->wr_cqe, struct rtrs_iu,
cqe);
err = rtrs_iu_post_recv(&con->c, iu);
if (err) {
- rtrs_err(con->c.sess, "post iu failed %d\n", err);
+ rtrs_err(con->c.path, "post iu failed %d\n", err);
rtrs_rdma_error_recovery(con);
}
}
static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
struct rtrs_msg_rkey_rsp *msg;
u32 imm_type, imm_payload;
bool w_inval = false;
@@ -529,25 +529,26 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
u32 buf_id;
int err;
- WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
+ WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0);
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
if (wc->byte_len < sizeof(*msg)) {
- rtrs_err(con->c.sess, "rkey response is malformed: size %d\n",
+ rtrs_err(con->c.path, "rkey response is malformed: size %d\n",
wc->byte_len);
goto out;
}
- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
+ ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr,
iu->size, DMA_FROM_DEVICE);
msg = iu->buf;
if (le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP) {
- rtrs_err(sess->clt, "rkey response is malformed: type %d\n",
+ rtrs_err(clt_path->clt,
+ "rkey response is malformed: type %d\n",
le16_to_cpu(msg->type));
goto out;
}
buf_id = le16_to_cpu(msg->buf_id);
- if (WARN_ON(buf_id >= sess->queue_depth))
+ if (WARN_ON(buf_id >= clt_path->queue_depth))
goto out;
rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload);
@@ -560,10 +561,10 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
if (WARN_ON(buf_id != msg_id))
goto out;
- sess->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey);
- process_io_rsp(sess, msg_id, err, w_inval);
+ clt_path->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey);
+ process_io_rsp(clt_path, msg_id, err, w_inval);
}
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, iu->dma_addr,
+ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, iu->dma_addr,
iu->size, DMA_FROM_DEVICE);
return rtrs_clt_recv_done(con, wc);
out:
@@ -600,14 +601,14 @@ static int rtrs_post_recv_empty_x2(struct rtrs_con *con, struct ib_cqe *cqe)
static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
u32 imm_type, imm_payload;
bool w_inval = false;
int err;
if (wc->status != IB_WC_SUCCESS) {
if (wc->status != IB_WC_WR_FLUSH_ERR) {
- rtrs_err(sess->clt, "RDMA failed: %s\n",
+ rtrs_err(clt_path->clt, "RDMA failed: %s\n",
ib_wc_status_msg(wc->status));
rtrs_rdma_error_recovery(con);
}
@@ -632,21 +633,21 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM);
rtrs_from_io_rsp_imm(imm_payload, &msg_id, &err);
- process_io_rsp(sess, msg_id, err, w_inval);
+ process_io_rsp(clt_path, msg_id, err, w_inval);
} else if (imm_type == RTRS_HB_MSG_IMM) {
WARN_ON(con->c.cid);
- rtrs_send_hb_ack(&sess->s);
- if (sess->flags & RTRS_MSG_NEW_RKEY_F)
+ rtrs_send_hb_ack(&clt_path->s);
+ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F)
return rtrs_clt_recv_done(con, wc);
} else if (imm_type == RTRS_HB_ACK_IMM) {
WARN_ON(con->c.cid);
- sess->s.hb_missed_cnt = 0;
- sess->s.hb_cur_latency =
- ktime_sub(ktime_get(), sess->s.hb_last_sent);
- if (sess->flags & RTRS_MSG_NEW_RKEY_F)
+ clt_path->s.hb_missed_cnt = 0;
+ clt_path->s.hb_cur_latency =
+ ktime_sub(ktime_get(), clt_path->s.hb_last_sent);
+ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F)
return rtrs_clt_recv_done(con, wc);
} else {
- rtrs_wrn(con->c.sess, "Unknown IMM type %u\n",
+ rtrs_wrn(con->c.path, "Unknown IMM type %u\n",
imm_type);
}
if (w_inval)
@@ -658,7 +659,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
else
err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
if (err) {
- rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n",
+ rtrs_err(con->c.path, "rtrs_post_recv_empty(): %d\n",
err);
rtrs_rdma_error_recovery(con);
}
@@ -670,7 +671,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE ||
wc->wc_flags & IB_WC_WITH_IMM));
WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done);
- if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
+ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) {
if (wc->wc_flags & IB_WC_WITH_INVALIDATE)
return rtrs_clt_recv_done(con, wc);
@@ -685,7 +686,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
break;
default:
- rtrs_wrn(sess->clt, "Unexpected WC type: %d\n", wc->opcode);
+ rtrs_wrn(clt_path->clt, "Unexpected WC type: %d\n", wc->opcode);
return;
}
}
@@ -693,10 +694,10 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
{
int err, i;
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
for (i = 0; i < q_size; i++) {
- if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
+ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) {
struct rtrs_iu *iu = &con->rsp_ius[i];
err = rtrs_iu_post_recv(&con->c, iu);
@@ -710,16 +711,16 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
return 0;
}
-static int post_recv_sess(struct rtrs_clt_sess *sess)
+static int post_recv_path(struct rtrs_clt_path *clt_path)
{
size_t q_size = 0;
int err, cid;
- for (cid = 0; cid < sess->s.con_num; cid++) {
+ for (cid = 0; cid < clt_path->s.con_num; cid++) {
if (cid == 0)
q_size = SERVICE_CON_QUEUE_DEPTH;
else
- q_size = sess->queue_depth;
+ q_size = clt_path->queue_depth;
/*
* x2 for RDMA read responses + FR key invalidations,
@@ -727,9 +728,10 @@ static int post_recv_sess(struct rtrs_clt_sess *sess)
*/
q_size *= 2;
- err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size);
+ err = post_recv_io(to_clt_con(clt_path->s.con[cid]), q_size);
if (err) {
- rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err);
+ rtrs_err(clt_path->clt, "post_recv_io(), err: %d\n",
+ err);
return err;
}
}
@@ -740,29 +742,29 @@ static int post_recv_sess(struct rtrs_clt_sess *sess)
struct path_it {
int i;
struct list_head skip_list;
- struct rtrs_clt *clt;
- struct rtrs_clt_sess *(*next_path)(struct path_it *it);
+ struct rtrs_clt_sess *clt;
+ struct rtrs_clt_path *(*next_path)(struct path_it *it);
};
-/**
- * list_next_or_null_rr_rcu - get next list element in round-robin fashion.
+/*
+ * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL
* @head: the head for the list.
- * @ptr: the list head to take the next element from.
- * @type: the type of the struct this is embedded in.
- * @memb: the name of the list_head within the struct.
+ * @clt_path: The element to take the next clt_path from.
*
- * Next element returned in round-robin fashion, i.e. head will be skipped,
+ * Next clt path returned in round-robin fashion, i.e. head will be skipped,
* but if list is observed as empty, NULL will be returned.
*
- * This primitive may safely run concurrently with the _rcu list-mutation
+ * This function may safely run concurrently with the _rcu list-mutation
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
-#define list_next_or_null_rr_rcu(head, ptr, type, memb) \
-({ \
- list_next_or_null_rcu(head, ptr, type, memb) ?: \
- list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \
- type, memb); \
-})
+static inline struct rtrs_clt_path *
+rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path)
+{
+ return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?:
+ list_next_or_null_rcu(head,
+ READ_ONCE((&clt_path->s.entry)->next),
+ typeof(*clt_path), s.entry);
+}
/**
* get_next_path_rr() - Returns path in round-robin fashion.
@@ -773,11 +775,11 @@ struct path_it {
* Locks:
* rcu_read_lock() must be hold.
*/
-static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
+static struct rtrs_clt_path *get_next_path_rr(struct path_it *it)
{
- struct rtrs_clt_sess __rcu **ppcpu_path;
- struct rtrs_clt_sess *path;
- struct rtrs_clt *clt;
+ struct rtrs_clt_path __rcu **ppcpu_path;
+ struct rtrs_clt_path *path;
+ struct rtrs_clt_sess *clt;
clt = it->clt;
@@ -793,10 +795,8 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
path = list_first_or_null_rcu(&clt->paths_list,
typeof(*path), s.entry);
else
- path = list_next_or_null_rr_rcu(&clt->paths_list,
- &path->s.entry,
- typeof(*path),
- s.entry);
+ path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path);
+
rcu_assign_pointer(*ppcpu_path, path);
return path;
@@ -811,26 +811,26 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
* Locks:
* rcu_read_lock() must be hold.
*/
-static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it)
+static struct rtrs_clt_path *get_next_path_min_inflight(struct path_it *it)
{
- struct rtrs_clt_sess *min_path = NULL;
- struct rtrs_clt *clt = it->clt;
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *min_path = NULL;
+ struct rtrs_clt_sess *clt = it->clt;
+ struct rtrs_clt_path *clt_path;
int min_inflight = INT_MAX;
int inflight;
- list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) {
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
continue;
- if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))
+ if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry)))
continue;
- inflight = atomic_read(&sess->stats->inflight);
+ inflight = atomic_read(&clt_path->stats->inflight);
if (inflight < min_inflight) {
min_inflight = inflight;
- min_path = sess;
+ min_path = clt_path;
}
}
@@ -862,26 +862,26 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it)
* Therefore the caller MUST check the returned
* path is NULL and trigger the IO error.
*/
-static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it)
+static struct rtrs_clt_path *get_next_path_min_latency(struct path_it *it)
{
- struct rtrs_clt_sess *min_path = NULL;
- struct rtrs_clt *clt = it->clt;
- struct rtrs_clt_sess *sess;
- ktime_t min_latency = INT_MAX;
+ struct rtrs_clt_path *min_path = NULL;
+ struct rtrs_clt_sess *clt = it->clt;
+ struct rtrs_clt_path *clt_path;
+ ktime_t min_latency = KTIME_MAX;
ktime_t latency;
- list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) {
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
continue;
- if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))
+ if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry)))
continue;
- latency = sess->s.hb_cur_latency;
+ latency = clt_path->s.hb_cur_latency;
if (latency < min_latency) {
min_latency = latency;
- min_path = sess;
+ min_path = clt_path;
}
}
@@ -895,7 +895,7 @@ static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it)
return min_path;
}
-static inline void path_it_init(struct path_it *it, struct rtrs_clt *clt)
+static inline void path_it_init(struct path_it *it, struct rtrs_clt_sess *clt)
{
INIT_LIST_HEAD(&it->skip_list);
it->clt = clt;
@@ -913,7 +913,7 @@ static inline void path_it_deinit(struct path_it *it)
{
struct list_head *skip, *tmp;
/*
- * The skip_list is used only for the MIN_INFLIGHT policy.
+ * The skip_list is used only for the MIN_INFLIGHT and MIN_LATENCY policies.
* We need to remove paths from it, so that next IO can insert
* paths (->mp_skip_entry) into a skip_list again.
*/
@@ -928,7 +928,7 @@ static inline void path_it_deinit(struct path_it *it)
* the corresponding buffer of rtrs_iu (req->iu->buf), which later on will
* also hold the control message of rtrs.
* @req: an io request holding information about IO.
- * @sess: client session
+ * @clt_path: client path
* @conf: conformation callback function to notify upper layer.
* @permit: permit for allocation of RDMA remote buffer
* @priv: private pointer
@@ -940,7 +940,7 @@ static inline void path_it_deinit(struct path_it *it)
* @dir: direction of the IO.
*/
static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
- struct rtrs_clt_sess *sess,
+ struct rtrs_clt_path *clt_path,
void (*conf)(void *priv, int errno),
struct rtrs_permit *permit, void *priv,
const struct kvec *vec, size_t usr_len,
@@ -958,13 +958,13 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
req->sg_cnt = sg_cnt;
req->priv = priv;
req->dir = dir;
- req->con = rtrs_permit_to_clt_con(sess, permit);
+ req->con = rtrs_permit_to_clt_con(clt_path, permit);
req->conf = conf;
req->need_inv = false;
req->need_inv_comp = false;
req->inv_errno = 0;
refcount_set(&req->ref, 1);
- req->mp_policy = sess->clt->mp_policy;
+ req->mp_policy = clt_path->clt->mp_policy;
iov_iter_kvec(&iter, READ, vec, 1, usr_len);
len = _copy_from_iter(req->iu->buf, usr_len, &iter);
@@ -974,7 +974,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
}
static struct rtrs_clt_io_req *
-rtrs_clt_get_req(struct rtrs_clt_sess *sess,
+rtrs_clt_get_req(struct rtrs_clt_path *clt_path,
void (*conf)(void *priv, int errno),
struct rtrs_permit *permit, void *priv,
const struct kvec *vec, size_t usr_len,
@@ -983,14 +983,14 @@ rtrs_clt_get_req(struct rtrs_clt_sess *sess,
{
struct rtrs_clt_io_req *req;
- req = &sess->reqs[permit->mem_id];
- rtrs_clt_init_req(req, sess, conf, permit, priv, vec, usr_len,
+ req = &clt_path->reqs[permit->mem_id];
+ rtrs_clt_init_req(req, clt_path, conf, permit, priv, vec, usr_len,
sg, sg_cnt, data_len, dir);
return req;
}
static struct rtrs_clt_io_req *
-rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
+rtrs_clt_get_copy_req(struct rtrs_clt_path *alive_path,
struct rtrs_clt_io_req *fail_req)
{
struct rtrs_clt_io_req *req;
@@ -999,8 +999,8 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
.iov_len = fail_req->usr_len
};
- req = &alive_sess->reqs[fail_req->permit->mem_id];
- rtrs_clt_init_req(req, alive_sess, fail_req->conf, fail_req->permit,
+ req = &alive_path->reqs[fail_req->permit->mem_id];
+ rtrs_clt_init_req(req, alive_path, fail_req->conf, fail_req->permit,
fail_req->priv, &vec, fail_req->usr_len,
fail_req->sglist, fail_req->sg_cnt,
fail_req->data_len, fail_req->dir);
@@ -1010,10 +1010,11 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
struct rtrs_clt_io_req *req,
struct rtrs_rbuf *rbuf, bool fr_en,
- u32 size, u32 imm, struct ib_send_wr *wr,
+ u32 count, u32 size, u32 imm,
+ struct ib_send_wr *wr,
struct ib_send_wr *tail)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
struct ib_sge *sge = req->sge;
enum ib_send_flags flags;
struct scatterlist *sg;
@@ -1030,25 +1031,26 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
num_sge = 2;
ptail = tail;
} else {
- for_each_sg(req->sglist, sg, req->sg_cnt, i) {
+ for_each_sg(req->sglist, sg, count, i) {
sge[i].addr = sg_dma_address(sg);
sge[i].length = sg_dma_len(sg);
- sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
}
- num_sge = 1 + req->sg_cnt;
+ num_sge = 1 + count;
}
sge[i].addr = req->iu->dma_addr;
sge[i].length = size;
- sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
/*
* From time to time we have to post signalled sends,
* or send queue will fill up and only QP reset can help.
*/
- flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ?
+ flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ?
0 : IB_SEND_SIGNALED;
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr,
+ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev,
+ req->iu->dma_addr,
size, DMA_TO_DEVICE);
return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge,
@@ -1074,8 +1076,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
{
struct rtrs_clt_con *con = req->con;
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_clt_sess *sess = to_clt_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_clt_path *clt_path = to_clt_path(s);
struct rtrs_msg_rdma_write *msg;
struct rtrs_rbuf *rbuf;
@@ -1088,13 +1090,13 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
- if (tsize > sess->chunk_size) {
+ if (tsize > clt_path->chunk_size) {
rtrs_wrn(s, "Write request failed, size too big %zu > %d\n",
- tsize, sess->chunk_size);
+ tsize, clt_path->chunk_size);
return -EMSGSIZE;
}
if (req->sg_cnt) {
- count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist,
+ count = ib_dma_map_sg(clt_path->s.dev->ib_dev, req->sglist,
req->sg_cnt, req->dir);
if (!count) {
rtrs_wrn(s, "Write request failed, map failed\n");
@@ -1111,7 +1113,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
imm = rtrs_to_io_req_imm(imm);
buf_id = req->permit->mem_id;
req->sg_size = tsize;
- rbuf = &sess->rbufs[buf_id];
+ rbuf = &clt_path->rbufs[buf_id];
if (count) {
ret = rtrs_map_sg_fr(req, count);
@@ -1119,7 +1121,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
rtrs_err_rl(s,
"Write request failed, failed to map fast reg. data, err: %d\n",
ret);
- ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
+ ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
req->sg_cnt, req->dir);
return ret;
}
@@ -1147,18 +1149,18 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
*/
rtrs_clt_update_all_stats(req, WRITE);
- ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en,
+ ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count,
req->usr_len + sizeof(*msg),
imm, wr, &inv_wr);
if (ret) {
rtrs_err_rl(s,
"Write request failed: error=%d path=%s [%s:%u]\n",
- ret, kobject_name(&sess->kobj), sess->hca_name,
- sess->hca_port);
+ ret, kobject_name(&clt_path->kobj), clt_path->hca_name,
+ clt_path->hca_port);
if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
- atomic_dec(&sess->stats->inflight);
+ atomic_dec(&clt_path->stats->inflight);
if (req->sg_cnt)
- ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
+ ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
req->sg_cnt, req->dir);
}
@@ -1168,10 +1170,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
{
struct rtrs_clt_con *con = req->con;
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_clt_sess *sess = to_clt_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_clt_path *clt_path = to_clt_path(s);
struct rtrs_msg_rdma_read *msg;
- struct rtrs_ib_dev *dev = sess->s.dev;
+ struct rtrs_ib_dev *dev = clt_path->s.dev;
struct ib_reg_wr rwr;
struct ib_send_wr *wr = NULL;
@@ -1181,10 +1183,10 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
- if (tsize > sess->chunk_size) {
+ if (tsize > clt_path->chunk_size) {
rtrs_wrn(s,
"Read request failed, message size is %zu, bigger than CHUNK_SIZE %d\n",
- tsize, sess->chunk_size);
+ tsize, clt_path->chunk_size);
return -EMSGSIZE;
}
@@ -1254,15 +1256,15 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
*/
rtrs_clt_update_all_stats(req, READ);
- ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id],
+ ret = rtrs_post_send_rdma(req->con, req, &clt_path->rbufs[buf_id],
req->data_len, imm, wr);
if (ret) {
rtrs_err_rl(s,
"Read request failed: error=%d path=%s [%s:%u]\n",
- ret, kobject_name(&sess->kobj), sess->hca_name,
- sess->hca_port);
+ ret, kobject_name(&clt_path->kobj), clt_path->hca_name,
+ clt_path->hca_port);
if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
- atomic_dec(&sess->stats->inflight);
+ atomic_dec(&clt_path->stats->inflight);
req->need_inv = false;
if (req->sg_cnt)
ib_dma_unmap_sg(dev->ib_dev, req->sglist,
@@ -1277,21 +1279,21 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
* @clt: clt context
* @fail_req: a failed io request.
*/
-static int rtrs_clt_failover_req(struct rtrs_clt *clt,
+static int rtrs_clt_failover_req(struct rtrs_clt_sess *clt,
struct rtrs_clt_io_req *fail_req)
{
- struct rtrs_clt_sess *alive_sess;
+ struct rtrs_clt_path *alive_path;
struct rtrs_clt_io_req *req;
int err = -ECONNABORTED;
struct path_it it;
rcu_read_lock();
for (path_it_init(&it, clt);
- (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num;
+ (alive_path = it.next_path(&it)) && it.i < it.clt->paths_num;
it.i++) {
- if (READ_ONCE(alive_sess->state) != RTRS_CLT_CONNECTED)
+ if (READ_ONCE(alive_path->state) != RTRS_CLT_CONNECTED)
continue;
- req = rtrs_clt_get_copy_req(alive_sess, fail_req);
+ req = rtrs_clt_get_copy_req(alive_path, fail_req);
if (req->dir == DMA_TO_DEVICE)
err = rtrs_clt_write_req(req);
else
@@ -1301,7 +1303,7 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt,
continue;
}
/* Success path */
- rtrs_clt_inc_failover_cnt(alive_sess->stats);
+ rtrs_clt_inc_failover_cnt(alive_path->stats);
break;
}
path_it_deinit(&it);
@@ -1310,16 +1312,16 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt,
return err;
}
-static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess)
+static void fail_all_outstanding_reqs(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_sess *clt = clt_path->clt;
struct rtrs_clt_io_req *req;
int i, err;
- if (!sess->reqs)
+ if (!clt_path->reqs)
return;
- for (i = 0; i < sess->queue_depth; ++i) {
- req = &sess->reqs[i];
+ for (i = 0; i < clt_path->queue_depth; ++i) {
+ req = &clt_path->reqs[i];
if (!req->in_use)
continue;
@@ -1337,38 +1339,39 @@ static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess)
}
}
-static void free_sess_reqs(struct rtrs_clt_sess *sess)
+static void free_path_reqs(struct rtrs_clt_path *clt_path)
{
struct rtrs_clt_io_req *req;
int i;
- if (!sess->reqs)
+ if (!clt_path->reqs)
return;
- for (i = 0; i < sess->queue_depth; ++i) {
- req = &sess->reqs[i];
+ for (i = 0; i < clt_path->queue_depth; ++i) {
+ req = &clt_path->reqs[i];
if (req->mr)
ib_dereg_mr(req->mr);
kfree(req->sge);
- rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(req->iu, clt_path->s.dev->ib_dev, 1);
}
- kfree(sess->reqs);
- sess->reqs = NULL;
+ kfree(clt_path->reqs);
+ clt_path->reqs = NULL;
}
-static int alloc_sess_reqs(struct rtrs_clt_sess *sess)
+static int alloc_path_reqs(struct rtrs_clt_path *clt_path)
{
struct rtrs_clt_io_req *req;
int i, err = -ENOMEM;
- sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs),
- GFP_KERNEL);
- if (!sess->reqs)
+ clt_path->reqs = kcalloc(clt_path->queue_depth,
+ sizeof(*clt_path->reqs),
+ GFP_KERNEL);
+ if (!clt_path->reqs)
return -ENOMEM;
- for (i = 0; i < sess->queue_depth; ++i) {
- req = &sess->reqs[i];
- req->iu = rtrs_iu_alloc(1, sess->max_hdr_size, GFP_KERNEL,
- sess->s.dev->ib_dev,
+ for (i = 0; i < clt_path->queue_depth; ++i) {
+ req = &clt_path->reqs[i];
+ req->iu = rtrs_iu_alloc(1, clt_path->max_hdr_size, GFP_KERNEL,
+ clt_path->s.dev->ib_dev,
DMA_TO_DEVICE,
rtrs_clt_rdma_done);
if (!req->iu)
@@ -1378,13 +1381,14 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess)
if (!req->sge)
goto out;
- req->mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
- sess->max_pages_per_mr);
+ req->mr = ib_alloc_mr(clt_path->s.dev->ib_pd,
+ IB_MR_TYPE_MEM_REG,
+ clt_path->max_pages_per_mr);
if (IS_ERR(req->mr)) {
err = PTR_ERR(req->mr);
req->mr = NULL;
- pr_err("Failed to alloc sess->max_pages_per_mr %d\n",
- sess->max_pages_per_mr);
+ pr_err("Failed to alloc clt_path->max_pages_per_mr %d\n",
+ clt_path->max_pages_per_mr);
goto out;
}
@@ -1394,18 +1398,17 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess)
return 0;
out:
- free_sess_reqs(sess);
+ free_path_reqs(clt_path);
return err;
}
-static int alloc_permits(struct rtrs_clt *clt)
+static int alloc_permits(struct rtrs_clt_sess *clt)
{
unsigned int chunk_bits;
int err, i;
- clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth),
- sizeof(long), GFP_KERNEL);
+ clt->permits_map = bitmap_zalloc(clt->queue_depth, GFP_KERNEL);
if (!clt->permits_map) {
err = -ENOMEM;
goto out_err;
@@ -1427,33 +1430,31 @@ static int alloc_permits(struct rtrs_clt *clt)
return 0;
err_map:
- kfree(clt->permits_map);
+ bitmap_free(clt->permits_map);
clt->permits_map = NULL;
out_err:
return err;
}
-static void free_permits(struct rtrs_clt *clt)
+static void free_permits(struct rtrs_clt_sess *clt)
{
- if (clt->permits_map) {
- size_t sz = clt->queue_depth;
-
+ if (clt->permits_map)
wait_event(clt->permits_wait,
- find_first_bit(clt->permits_map, sz) >= sz);
- }
- kfree(clt->permits_map);
+ bitmap_empty(clt->permits_map, clt->queue_depth));
+
+ bitmap_free(clt->permits_map);
clt->permits_map = NULL;
kfree(clt->permits);
clt->permits = NULL;
}
-static void query_fast_reg_mode(struct rtrs_clt_sess *sess)
+static void query_fast_reg_mode(struct rtrs_clt_path *clt_path)
{
struct ib_device *ib_dev;
u64 max_pages_per_mr;
int mr_page_shift;
- ib_dev = sess->s.dev->ib_dev;
+ ib_dev = clt_path->s.dev->ib_dev;
/*
* Use the smallest page size supported by the HCA, down to a
@@ -1463,24 +1464,24 @@ static void query_fast_reg_mode(struct rtrs_clt_sess *sess)
mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1);
max_pages_per_mr = ib_dev->attrs.max_mr_size;
do_div(max_pages_per_mr, (1ull << mr_page_shift));
- sess->max_pages_per_mr =
- min3(sess->max_pages_per_mr, (u32)max_pages_per_mr,
+ clt_path->max_pages_per_mr =
+ min3(clt_path->max_pages_per_mr, (u32)max_pages_per_mr,
ib_dev->attrs.max_fast_reg_page_list_len);
- sess->clt->max_segments =
- min(sess->max_pages_per_mr, sess->clt->max_segments);
+ clt_path->clt->max_segments =
+ min(clt_path->max_pages_per_mr, clt_path->clt->max_segments);
}
-static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess,
+static bool rtrs_clt_change_state_get_old(struct rtrs_clt_path *clt_path,
enum rtrs_clt_state new_state,
enum rtrs_clt_state *old_state)
{
bool changed;
- spin_lock_irq(&sess->state_wq.lock);
+ spin_lock_irq(&clt_path->state_wq.lock);
if (old_state)
- *old_state = sess->state;
- changed = rtrs_clt_change_state(sess, new_state);
- spin_unlock_irq(&sess->state_wq.lock);
+ *old_state = clt_path->state;
+ changed = rtrs_clt_change_state(clt_path, new_state);
+ spin_unlock_irq(&clt_path->state_wq.lock);
return changed;
}
@@ -1492,9 +1493,9 @@ static void rtrs_clt_hb_err_handler(struct rtrs_con *c)
rtrs_rdma_error_recovery(con);
}
-static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess)
+static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path)
{
- rtrs_init_hb(&sess->s, &io_comp_cqe,
+ rtrs_init_hb(&clt_path->s, &io_comp_cqe,
RTRS_HB_INTERVAL_MS,
RTRS_HB_MISSED_MAX,
rtrs_clt_hb_err_handler,
@@ -1504,17 +1505,32 @@ static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess)
static void rtrs_clt_reconnect_work(struct work_struct *work);
static void rtrs_clt_close_work(struct work_struct *work);
-static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
+static void rtrs_clt_err_recovery_work(struct work_struct *work)
+{
+ struct rtrs_clt_path *clt_path;
+ struct rtrs_clt_sess *clt;
+ int delay_ms;
+
+ clt_path = container_of(work, struct rtrs_clt_path, err_recovery_work);
+ clt = clt_path->clt;
+ delay_ms = clt->reconnect_delay_sec * 1000;
+ rtrs_clt_stop_and_destroy_conns(clt_path);
+ queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork,
+ msecs_to_jiffies(delay_ms +
+ prandom_u32_max(RTRS_RECONNECT_SEED)));
+}
+
+static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt,
const struct rtrs_addr *path,
size_t con_num, u32 nr_poll_queues)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int err = -ENOMEM;
int cpu;
size_t total_con;
- sess = kzalloc(sizeof(*sess), GFP_KERNEL);
- if (!sess)
+ clt_path = kzalloc(sizeof(*clt_path), GFP_KERNEL);
+ if (!clt_path)
goto err;
/*
@@ -1522,20 +1538,21 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
* +1: Extra connection for user messages
*/
total_con = con_num + nr_poll_queues + 1;
- sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
- if (!sess->s.con)
- goto err_free_sess;
+ clt_path->s.con = kcalloc(total_con, sizeof(*clt_path->s.con),
+ GFP_KERNEL);
+ if (!clt_path->s.con)
+ goto err_free_path;
- sess->s.con_num = total_con;
- sess->s.irq_con_num = con_num + 1;
+ clt_path->s.con_num = total_con;
+ clt_path->s.irq_con_num = con_num + 1;
- sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
- if (!sess->stats)
+ clt_path->stats = kzalloc(sizeof(*clt_path->stats), GFP_KERNEL);
+ if (!clt_path->stats)
goto err_free_con;
- mutex_init(&sess->init_mutex);
- uuid_gen(&sess->s.uuid);
- memcpy(&sess->s.dst_addr, path->dst,
+ mutex_init(&clt_path->init_mutex);
+ uuid_gen(&clt_path->s.uuid);
+ memcpy(&clt_path->s.dst_addr, path->dst,
rdma_addr_size((struct sockaddr *)path->dst));
/*
@@ -1544,53 +1561,55 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
* the sess->src_addr will contain only zeros, which is then fine.
*/
if (path->src)
- memcpy(&sess->s.src_addr, path->src,
+ memcpy(&clt_path->s.src_addr, path->src,
rdma_addr_size((struct sockaddr *)path->src));
- strscpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
- sess->clt = clt;
- sess->max_pages_per_mr = RTRS_MAX_SEGMENTS;
- init_waitqueue_head(&sess->state_wq);
- sess->state = RTRS_CLT_CONNECTING;
- atomic_set(&sess->connected_cnt, 0);
- INIT_WORK(&sess->close_work, rtrs_clt_close_work);
- INIT_DELAYED_WORK(&sess->reconnect_dwork, rtrs_clt_reconnect_work);
- rtrs_clt_init_hb(sess);
-
- sess->mp_skip_entry = alloc_percpu(typeof(*sess->mp_skip_entry));
- if (!sess->mp_skip_entry)
+ strscpy(clt_path->s.sessname, clt->sessname,
+ sizeof(clt_path->s.sessname));
+ clt_path->clt = clt;
+ clt_path->max_pages_per_mr = RTRS_MAX_SEGMENTS;
+ init_waitqueue_head(&clt_path->state_wq);
+ clt_path->state = RTRS_CLT_CONNECTING;
+ atomic_set(&clt_path->connected_cnt, 0);
+ INIT_WORK(&clt_path->close_work, rtrs_clt_close_work);
+ INIT_WORK(&clt_path->err_recovery_work, rtrs_clt_err_recovery_work);
+ INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work);
+ rtrs_clt_init_hb(clt_path);
+
+ clt_path->mp_skip_entry = alloc_percpu(typeof(*clt_path->mp_skip_entry));
+ if (!clt_path->mp_skip_entry)
goto err_free_stats;
for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(per_cpu_ptr(sess->mp_skip_entry, cpu));
+ INIT_LIST_HEAD(per_cpu_ptr(clt_path->mp_skip_entry, cpu));
- err = rtrs_clt_init_stats(sess->stats);
+ err = rtrs_clt_init_stats(clt_path->stats);
if (err)
goto err_free_percpu;
- return sess;
+ return clt_path;
err_free_percpu:
- free_percpu(sess->mp_skip_entry);
+ free_percpu(clt_path->mp_skip_entry);
err_free_stats:
- kfree(sess->stats);
+ kfree(clt_path->stats);
err_free_con:
- kfree(sess->s.con);
-err_free_sess:
- kfree(sess);
+ kfree(clt_path->s.con);
+err_free_path:
+ kfree(clt_path);
err:
return ERR_PTR(err);
}
-void free_sess(struct rtrs_clt_sess *sess)
+void free_path(struct rtrs_clt_path *clt_path)
{
- free_percpu(sess->mp_skip_entry);
- mutex_destroy(&sess->init_mutex);
- kfree(sess->s.con);
- kfree(sess->rbufs);
- kfree(sess);
+ free_percpu(clt_path->mp_skip_entry);
+ mutex_destroy(&clt_path->init_mutex);
+ kfree(clt_path->s.con);
+ kfree(clt_path->rbufs);
+ kfree(clt_path);
}
-static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
+static int create_con(struct rtrs_clt_path *clt_path, unsigned int cid)
{
struct rtrs_clt_con *con;
@@ -1601,28 +1620,28 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
/* Map first two connections to the first CPU */
con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids;
con->c.cid = cid;
- con->c.sess = &sess->s;
+ con->c.path = &clt_path->s;
/* Align with srv, init as 1 */
atomic_set(&con->c.wr_cnt, 1);
mutex_init(&con->con_mutex);
- sess->s.con[cid] = &con->c;
+ clt_path->s.con[cid] = &con->c;
return 0;
}
static void destroy_con(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
- sess->s.con[con->c.cid] = NULL;
+ clt_path->s.con[con->c.cid] = NULL;
mutex_destroy(&con->con_mutex);
kfree(con);
}
static int create_con_cq_qp(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
u32 max_send_wr, max_recv_wr, cq_num, max_send_sge, wr_limit;
int err, cq_vector;
struct rtrs_msg_rkey_rsp *rsp;
@@ -1631,7 +1650,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
if (con->c.cid == 0) {
max_send_sge = 1;
/* We must be the first here */
- if (WARN_ON(sess->s.dev))
+ if (WARN_ON(clt_path->s.dev))
return -EINVAL;
/*
@@ -1639,16 +1658,16 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
* Be careful not to close user connection before ib dev
* is gracefully put.
*/
- sess->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device,
+ clt_path->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device,
&dev_pd);
- if (!sess->s.dev) {
- rtrs_wrn(sess->clt,
+ if (!clt_path->s.dev) {
+ rtrs_wrn(clt_path->clt,
"rtrs_ib_dev_find_get_or_add(): no memory\n");
return -ENOMEM;
}
- sess->s.dev_ref = 1;
- query_fast_reg_mode(sess);
- wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
+ clt_path->s.dev_ref = 1;
+ query_fast_reg_mode(clt_path);
+ wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr;
/*
* Two (request + registration) completion for send
* Two for recv if always_invalidate is set on server
@@ -1665,27 +1684,28 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
* This is always true if user connection (cid == 0) is
* established first.
*/
- if (WARN_ON(!sess->s.dev))
+ if (WARN_ON(!clt_path->s.dev))
return -EINVAL;
- if (WARN_ON(!sess->queue_depth))
+ if (WARN_ON(!clt_path->queue_depth))
return -EINVAL;
- wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
+ wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr;
/* Shared between connections */
- sess->s.dev_ref++;
+ clt_path->s.dev_ref++;
max_send_wr = min_t(int, wr_limit,
/* QD * (REQ + RSP + FR REGS or INVS) + drain */
- sess->queue_depth * 3 + 1);
+ clt_path->queue_depth * 3 + 1);
max_recv_wr = min_t(int, wr_limit,
- sess->queue_depth * 3 + 1);
+ clt_path->queue_depth * 3 + 1);
max_send_sge = 2;
}
atomic_set(&con->c.sq_wr_avail, max_send_wr);
cq_num = max_send_wr + max_recv_wr;
/* alloc iu to recv new rkey reply when server reports flags set */
- if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
+ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
con->rsp_ius = rtrs_iu_alloc(cq_num, sizeof(*rsp),
- GFP_KERNEL, sess->s.dev->ib_dev,
+ GFP_KERNEL,
+ clt_path->s.dev->ib_dev,
DMA_FROM_DEVICE,
rtrs_clt_rdma_done);
if (!con->rsp_ius)
@@ -1693,13 +1713,13 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
con->queue_num = cq_num;
}
cq_num = max_send_wr + max_recv_wr;
- cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
- if (con->c.cid >= sess->s.irq_con_num)
- err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge,
+ cq_vector = con->cpu % clt_path->s.dev->ib_dev->num_comp_vectors;
+ if (con->c.cid >= clt_path->s.irq_con_num)
+ err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge,
cq_vector, cq_num, max_send_wr,
max_recv_wr, IB_POLL_DIRECT);
else
- err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge,
+ err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge,
cq_vector, cq_num, max_send_wr,
max_recv_wr, IB_POLL_SOFTIRQ);
/*
@@ -1711,7 +1731,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
static void destroy_con_cq_qp(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
/*
* Be careful here: destroy_con_cq_qp() can be called even
@@ -1720,13 +1740,14 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con)
lockdep_assert_held(&con->con_mutex);
rtrs_cq_qp_destroy(&con->c);
if (con->rsp_ius) {
- rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_num);
+ rtrs_iu_free(con->rsp_ius, clt_path->s.dev->ib_dev,
+ con->queue_num);
con->rsp_ius = NULL;
con->queue_num = 0;
}
- if (sess->s.dev_ref && !--sess->s.dev_ref) {
- rtrs_ib_dev_put(sess->s.dev);
- sess->s.dev = NULL;
+ if (clt_path->s.dev_ref && !--clt_path->s.dev_ref) {
+ rtrs_ib_dev_put(clt_path->s.dev);
+ clt_path->s.dev = NULL;
}
}
@@ -1745,7 +1766,7 @@ static void destroy_cm(struct rtrs_clt_con *con)
static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
{
- struct rtrs_sess *s = con->c.sess;
+ struct rtrs_path *s = con->c.path;
int err;
mutex_lock(&con->con_mutex);
@@ -1764,8 +1785,8 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_clt_sess *clt = clt_path->clt;
struct rtrs_msg_conn_req msg;
struct rdma_conn_param param;
@@ -1782,11 +1803,11 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
.magic = cpu_to_le16(RTRS_MAGIC),
.version = cpu_to_le16(RTRS_PROTO_VER),
.cid = cpu_to_le16(con->c.cid),
- .cid_num = cpu_to_le16(sess->s.con_num),
- .recon_cnt = cpu_to_le16(sess->s.recon_cnt),
+ .cid_num = cpu_to_le16(clt_path->s.con_num),
+ .recon_cnt = cpu_to_le16(clt_path->s.recon_cnt),
};
- msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0;
- uuid_copy(&msg.sess_uuid, &sess->s.uuid);
+ msg.first_conn = clt_path->for_new_clt ? FIRST_CONN : 0;
+ uuid_copy(&msg.sess_uuid, &clt_path->s.uuid);
uuid_copy(&msg.paths_uuid, &clt->paths_uuid);
err = rdma_connect_locked(con->c.cm_id, &param);
@@ -1799,8 +1820,8 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
struct rdma_cm_event *ev)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_clt_sess *clt = clt_path->clt;
const struct rtrs_msg_conn_rsp *msg;
u16 version, queue_depth;
int errno;
@@ -1831,31 +1852,32 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
if (con->c.cid == 0) {
queue_depth = le16_to_cpu(msg->queue_depth);
- if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) {
+ if (clt_path->queue_depth > 0 && queue_depth != clt_path->queue_depth) {
rtrs_err(clt, "Error: queue depth changed\n");
/*
* Stop any more reconnection attempts
*/
- sess->reconnect_attempts = -1;
+ clt_path->reconnect_attempts = -1;
rtrs_err(clt,
"Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n");
return -ECONNRESET;
}
- if (!sess->rbufs) {
- sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs),
- GFP_KERNEL);
- if (!sess->rbufs)
+ if (!clt_path->rbufs) {
+ clt_path->rbufs = kcalloc(queue_depth,
+ sizeof(*clt_path->rbufs),
+ GFP_KERNEL);
+ if (!clt_path->rbufs)
return -ENOMEM;
}
- sess->queue_depth = queue_depth;
- sess->s.signal_interval = min_not_zero(queue_depth,
+ clt_path->queue_depth = queue_depth;
+ clt_path->s.signal_interval = min_not_zero(queue_depth,
(unsigned short) SERVICE_CON_QUEUE_DEPTH);
- sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size);
- sess->max_io_size = le32_to_cpu(msg->max_io_size);
- sess->flags = le32_to_cpu(msg->flags);
- sess->chunk_size = sess->max_io_size + sess->max_hdr_size;
+ clt_path->max_hdr_size = le32_to_cpu(msg->max_hdr_size);
+ clt_path->max_io_size = le32_to_cpu(msg->max_io_size);
+ clt_path->flags = le32_to_cpu(msg->flags);
+ clt_path->chunk_size = clt_path->max_io_size + clt_path->max_hdr_size;
/*
* Global IO size is always a minimum.
@@ -1866,20 +1888,20 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
* connections in parallel, use lock.
*/
mutex_lock(&clt->paths_mutex);
- clt->queue_depth = sess->queue_depth;
- clt->max_io_size = min_not_zero(sess->max_io_size,
+ clt->queue_depth = clt_path->queue_depth;
+ clt->max_io_size = min_not_zero(clt_path->max_io_size,
clt->max_io_size);
mutex_unlock(&clt->paths_mutex);
/*
* Cache the hca_port and hca_name for sysfs
*/
- sess->hca_port = con->c.cm_id->port_num;
- scnprintf(sess->hca_name, sizeof(sess->hca_name),
- sess->s.dev->ib_dev->name);
- sess->s.src_addr = con->c.cm_id->route.addr.src_addr;
+ clt_path->hca_port = con->c.cm_id->port_num;
+ scnprintf(clt_path->hca_name, sizeof(clt_path->hca_name),
+ clt_path->s.dev->ib_dev->name);
+ clt_path->s.src_addr = con->c.cm_id->route.addr.src_addr;
/* set for_new_clt, to allow future reconnect on any path */
- sess->for_new_clt = 1;
+ clt_path->for_new_clt = 1;
}
return 0;
@@ -1887,16 +1909,16 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
static inline void flag_success_on_conn(struct rtrs_clt_con *con)
{
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
- atomic_inc(&sess->connected_cnt);
+ atomic_inc(&clt_path->connected_cnt);
con->cm_err = 1;
}
static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
struct rdma_cm_event *ev)
{
- struct rtrs_sess *s = con->c.sess;
+ struct rtrs_path *s = con->c.path;
const struct rtrs_msg_conn_rsp *msg;
const char *rej_msg;
int status, errno;
@@ -1924,23 +1946,25 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
return -ECONNRESET;
}
-void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait)
+void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait)
{
- if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL))
- queue_work(rtrs_wq, &sess->close_work);
+ trace_rtrs_clt_close_conns(clt_path);
+
+ if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSING, NULL))
+ queue_work(rtrs_wq, &clt_path->close_work);
if (wait)
- flush_work(&sess->close_work);
+ flush_work(&clt_path->close_work);
}
static inline void flag_error_on_conn(struct rtrs_clt_con *con, int cm_err)
{
if (con->cm_err == 1) {
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = to_clt_sess(con->c.sess);
- if (atomic_dec_and_test(&sess->connected_cnt))
+ clt_path = to_clt_path(con->c.path);
+ if (atomic_dec_and_test(&clt_path->connected_cnt))
- wake_up(&sess->state_wq);
+ wake_up(&clt_path->state_wq);
}
con->cm_err = cm_err;
}
@@ -1949,8 +1973,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *ev)
{
struct rtrs_clt_con *con = cm_id->context;
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_clt_sess *sess = to_clt_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_clt_path *clt_path = to_clt_path(s);
int cm_err = 0;
switch (ev->event) {
@@ -1968,7 +1992,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
* i.e. wake up without state change, but we set cm_err.
*/
flag_success_on_conn(con);
- wake_up(&sess->state_wq);
+ wake_up(&clt_path->state_wq);
return 0;
}
break;
@@ -1997,7 +2021,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
/*
* Device removal is a special case. Queue close and return 0.
*/
- rtrs_clt_close_conns(sess, false);
+ rtrs_clt_close_conns(clt_path, false);
return 0;
default:
rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %d)\n",
@@ -2020,13 +2044,13 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
static int create_cm(struct rtrs_clt_con *con)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_clt_sess *sess = to_clt_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_clt_path *clt_path = to_clt_path(s);
struct rdma_cm_id *cm_id;
int err;
cm_id = rdma_create_id(&init_net, rtrs_clt_rdma_cm_handler, con,
- sess->s.dst_addr.ss_family == AF_IB ?
+ clt_path->s.dst_addr.ss_family == AF_IB ?
RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
err = PTR_ERR(cm_id);
@@ -2042,8 +2066,8 @@ static int create_cm(struct rtrs_clt_con *con)
rtrs_err(s, "Set address reuse failed, err: %d\n", err);
goto destroy_cm;
}
- err = rdma_resolve_addr(cm_id, (struct sockaddr *)&sess->s.src_addr,
- (struct sockaddr *)&sess->s.dst_addr,
+ err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
+ (struct sockaddr *)&clt_path->s.dst_addr,
RTRS_CONNECT_TIMEOUT_MS);
if (err) {
rtrs_err(s, "Failed to resolve address, err: %d\n", err);
@@ -2055,8 +2079,8 @@ static int create_cm(struct rtrs_clt_con *con)
* or session state was really changed to error by device removal.
*/
err = wait_event_interruptible_timeout(
- sess->state_wq,
- con->cm_err || sess->state != RTRS_CLT_CONNECTING,
+ clt_path->state_wq,
+ con->cm_err || clt_path->state != RTRS_CLT_CONNECTING,
msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS));
if (err == 0 || err == -ERESTARTSYS) {
if (err == 0)
@@ -2068,7 +2092,7 @@ static int create_cm(struct rtrs_clt_con *con)
err = con->cm_err;
goto errr;
}
- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTING) {
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) {
/* Device removal */
err = -ECONNABORTED;
goto errr;
@@ -2087,9 +2111,9 @@ destroy_cm:
return err;
}
-static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess)
+static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_sess *clt = clt_path->clt;
int up;
/*
@@ -2113,19 +2137,19 @@ static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess)
mutex_unlock(&clt->paths_ev_mutex);
/* Mark session as established */
- sess->established = true;
- sess->reconnect_attempts = 0;
- sess->stats->reconnects.successful_cnt++;
+ clt_path->established = true;
+ clt_path->reconnect_attempts = 0;
+ clt_path->stats->reconnects.successful_cnt++;
}
-static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess)
+static void rtrs_clt_path_down(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_sess *clt = clt_path->clt;
- if (!sess->established)
+ if (!clt_path->established)
return;
- sess->established = false;
+ clt_path->established = false;
mutex_lock(&clt->paths_ev_mutex);
WARN_ON(!clt->paths_up);
if (--clt->paths_up == 0)
@@ -2133,19 +2157,19 @@ static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess)
mutex_unlock(&clt->paths_ev_mutex);
}
-static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
+static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path)
{
struct rtrs_clt_con *con;
unsigned int cid;
- WARN_ON(READ_ONCE(sess->state) == RTRS_CLT_CONNECTED);
+ WARN_ON(READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED);
/*
* Possible race with rtrs_clt_open(), when DEVICE_REMOVAL comes
* exactly in between. Start destroying after it finishes.
*/
- mutex_lock(&sess->init_mutex);
- mutex_unlock(&sess->init_mutex);
+ mutex_lock(&clt_path->init_mutex);
+ mutex_unlock(&clt_path->init_mutex);
/*
* All IO paths must observe !CONNECTED state before we
@@ -2153,7 +2177,7 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
*/
synchronize_rcu();
- rtrs_stop_hb(&sess->s);
+ rtrs_stop_hb(&clt_path->s);
/*
* The order it utterly crucial: firstly disconnect and complete all
@@ -2162,15 +2186,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
* eventually notify upper layer about session disconnection.
*/
- for (cid = 0; cid < sess->s.con_num; cid++) {
- if (!sess->s.con[cid])
+ for (cid = 0; cid < clt_path->s.con_num; cid++) {
+ if (!clt_path->s.con[cid])
break;
- con = to_clt_con(sess->s.con[cid]);
+ con = to_clt_con(clt_path->s.con[cid]);
stop_cm(con);
}
- fail_all_outstanding_reqs(sess);
- free_sess_reqs(sess);
- rtrs_clt_sess_down(sess);
+ fail_all_outstanding_reqs(clt_path);
+ free_path_reqs(clt_path);
+ rtrs_clt_path_down(clt_path);
/*
* Wait for graceful shutdown, namely when peer side invokes
@@ -2180,13 +2204,14 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
* since CM does not fire anything. That is fine, we are not in
* hurry.
*/
- wait_event_timeout(sess->state_wq, !atomic_read(&sess->connected_cnt),
+ wait_event_timeout(clt_path->state_wq,
+ !atomic_read(&clt_path->connected_cnt),
msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS));
- for (cid = 0; cid < sess->s.con_num; cid++) {
- if (!sess->s.con[cid])
+ for (cid = 0; cid < clt_path->s.con_num; cid++) {
+ if (!clt_path->s.con[cid])
break;
- con = to_clt_con(sess->s.con[cid]);
+ con = to_clt_con(clt_path->s.con[cid]);
mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
mutex_unlock(&con->con_mutex);
@@ -2195,26 +2220,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
}
}
-static inline bool xchg_sessions(struct rtrs_clt_sess __rcu **rcu_ppcpu_path,
- struct rtrs_clt_sess *sess,
- struct rtrs_clt_sess *next)
-{
- struct rtrs_clt_sess **ppcpu_path;
-
- /* Call cmpxchg() without sparse warnings */
- ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path;
- return sess == cmpxchg(ppcpu_path, sess, next);
-}
-
-static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
+static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt *clt = sess->clt;
- struct rtrs_clt_sess *next;
+ struct rtrs_clt_sess *clt = clt_path->clt;
+ struct rtrs_clt_path *next;
bool wait_for_grace = false;
int cpu;
mutex_lock(&clt->paths_mutex);
- list_del_rcu(&sess->s.entry);
+ list_del_rcu(&clt_path->s.entry);
/* Make sure everybody observes path removal. */
synchronize_rcu();
@@ -2255,8 +2269,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
* removed. If @sess is the last element, then @next is NULL.
*/
rcu_read_lock();
- next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry,
- typeof(*next), s.entry);
+ next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path);
rcu_read_unlock();
/*
@@ -2264,11 +2277,11 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
* removed, so change the pointer manually.
*/
for_each_possible_cpu(cpu) {
- struct rtrs_clt_sess __rcu **ppcpu_path;
+ struct rtrs_clt_path __rcu **ppcpu_path;
ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu);
if (rcu_dereference_protected(*ppcpu_path,
- lockdep_is_held(&clt->paths_mutex)) != sess)
+ lockdep_is_held(&clt->paths_mutex)) != clt_path)
/*
* synchronize_rcu() was called just after deleting
* entry from the list, thus IO code path cannot
@@ -2281,7 +2294,8 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
* We race with IO code path, which also changes pointer,
* thus we have to be careful not to overwrite it.
*/
- if (xchg_sessions(ppcpu_path, sess, next))
+ if (try_cmpxchg((struct rtrs_clt_path **)ppcpu_path, &clt_path,
+ next))
/*
* @ppcpu_path was successfully replaced with @next,
* that means that someone could also pick up the
@@ -2296,29 +2310,30 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
mutex_unlock(&clt->paths_mutex);
}
-static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess)
+static void rtrs_clt_add_path_to_arr(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt *clt = sess->clt;
+ struct rtrs_clt_sess *clt = clt_path->clt;
mutex_lock(&clt->paths_mutex);
clt->paths_num++;
- list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
+ list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list);
mutex_unlock(&clt->paths_mutex);
}
static void rtrs_clt_close_work(struct work_struct *work)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = container_of(work, struct rtrs_clt_sess, close_work);
+ clt_path = container_of(work, struct rtrs_clt_path, close_work);
- cancel_delayed_work_sync(&sess->reconnect_dwork);
- rtrs_clt_stop_and_destroy_conns(sess);
- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL);
+ cancel_work_sync(&clt_path->err_recovery_work);
+ cancel_delayed_work_sync(&clt_path->reconnect_dwork);
+ rtrs_clt_stop_and_destroy_conns(clt_path);
+ rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL);
}
-static int init_conns(struct rtrs_clt_sess *sess)
+static int init_conns(struct rtrs_clt_path *clt_path)
{
unsigned int cid;
int err;
@@ -2328,31 +2343,31 @@ static int init_conns(struct rtrs_clt_sess *sess)
* to avoid clashes with previous sessions not yet closed
* sessions on a server side.
*/
- sess->s.recon_cnt++;
+ clt_path->s.recon_cnt++;
/* Establish all RDMA connections */
- for (cid = 0; cid < sess->s.con_num; cid++) {
- err = create_con(sess, cid);
+ for (cid = 0; cid < clt_path->s.con_num; cid++) {
+ err = create_con(clt_path, cid);
if (err)
goto destroy;
- err = create_cm(to_clt_con(sess->s.con[cid]));
+ err = create_cm(to_clt_con(clt_path->s.con[cid]));
if (err) {
- destroy_con(to_clt_con(sess->s.con[cid]));
+ destroy_con(to_clt_con(clt_path->s.con[cid]));
goto destroy;
}
}
- err = alloc_sess_reqs(sess);
+ err = alloc_path_reqs(clt_path);
if (err)
goto destroy;
- rtrs_start_hb(&sess->s);
+ rtrs_start_hb(&clt_path->s);
return 0;
destroy:
while (cid--) {
- struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]);
+ struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]);
stop_cm(con);
@@ -2367,7 +2382,7 @@ destroy:
* doing rdma_resolve_addr(), switch to CONNECTION_ERR state
* manually to keep reconnecting.
*/
- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
+ rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL);
return err;
}
@@ -2375,31 +2390,32 @@ destroy:
static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
struct rtrs_iu *iu;
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1);
if (wc->status != IB_WC_SUCCESS) {
- rtrs_err(sess->clt, "Sess info request send failed: %s\n",
+ rtrs_err(clt_path->clt, "Path info request send failed: %s\n",
ib_wc_status_msg(wc->status));
- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
+ rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL);
return;
}
rtrs_clt_update_wc_stats(con);
}
-static int process_info_rsp(struct rtrs_clt_sess *sess,
+static int process_info_rsp(struct rtrs_clt_path *clt_path,
const struct rtrs_msg_info_rsp *msg)
{
unsigned int sg_cnt, total_len;
int i, sgi;
sg_cnt = le16_to_cpu(msg->sg_cnt);
- if (!sg_cnt || (sess->queue_depth % sg_cnt)) {
- rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n",
+ if (!sg_cnt || (clt_path->queue_depth % sg_cnt)) {
+ rtrs_err(clt_path->clt,
+ "Incorrect sg_cnt %d, is not multiple\n",
sg_cnt);
return -EINVAL;
}
@@ -2408,15 +2424,15 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
* Check if IB immediate data size is enough to hold the mem_id and
* the offset inside the memory chunk.
*/
- if ((ilog2(sg_cnt - 1) + 1) + (ilog2(sess->chunk_size - 1) + 1) >
+ if ((ilog2(sg_cnt - 1) + 1) + (ilog2(clt_path->chunk_size - 1) + 1) >
MAX_IMM_PAYL_BITS) {
- rtrs_err(sess->clt,
+ rtrs_err(clt_path->clt,
"RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n",
- MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size);
+ MAX_IMM_PAYL_BITS, sg_cnt, clt_path->chunk_size);
return -EINVAL;
}
total_len = 0;
- for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) {
+ for (sgi = 0, i = 0; sgi < sg_cnt && i < clt_path->queue_depth; sgi++) {
const struct rtrs_sg_desc *desc = &msg->desc[sgi];
u32 len, rkey;
u64 addr;
@@ -2427,26 +2443,28 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
total_len += len;
- if (!len || (len % sess->chunk_size)) {
- rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi,
+ if (!len || (len % clt_path->chunk_size)) {
+ rtrs_err(clt_path->clt, "Incorrect [%d].len %d\n",
+ sgi,
len);
return -EINVAL;
}
- for ( ; len && i < sess->queue_depth; i++) {
- sess->rbufs[i].addr = addr;
- sess->rbufs[i].rkey = rkey;
+ for ( ; len && i < clt_path->queue_depth; i++) {
+ clt_path->rbufs[i].addr = addr;
+ clt_path->rbufs[i].rkey = rkey;
- len -= sess->chunk_size;
- addr += sess->chunk_size;
+ len -= clt_path->chunk_size;
+ addr += clt_path->chunk_size;
}
}
/* Sanity check */
- if (sgi != sg_cnt || i != sess->queue_depth) {
- rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n");
+ if (sgi != sg_cnt || i != clt_path->queue_depth) {
+ rtrs_err(clt_path->clt,
+ "Incorrect sg vector, not fully mapped\n");
return -EINVAL;
}
- if (total_len != sess->chunk_size * sess->queue_depth) {
- rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len);
+ if (total_len != clt_path->chunk_size * clt_path->queue_depth) {
+ rtrs_err(clt_path->clt, "Incorrect total_len %d\n", total_len);
return -EINVAL;
}
@@ -2456,7 +2474,7 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
struct rtrs_msg_info_rsp *msg;
enum rtrs_clt_state state;
struct rtrs_iu *iu;
@@ -2468,37 +2486,37 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(con->c.cid);
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
if (wc->status != IB_WC_SUCCESS) {
- rtrs_err(sess->clt, "Sess info response recv failed: %s\n",
+ rtrs_err(clt_path->clt, "Path info response recv failed: %s\n",
ib_wc_status_msg(wc->status));
goto out;
}
WARN_ON(wc->opcode != IB_WC_RECV);
if (wc->byte_len < sizeof(*msg)) {
- rtrs_err(sess->clt, "Sess info response is malformed: size %d\n",
+ rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n",
wc->byte_len);
goto out;
}
- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
+ ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr,
iu->size, DMA_FROM_DEVICE);
msg = iu->buf;
if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP) {
- rtrs_err(sess->clt, "Sess info response is malformed: type %d\n",
+ rtrs_err(clt_path->clt, "Path info response is malformed: type %d\n",
le16_to_cpu(msg->type));
goto out;
}
rx_sz = sizeof(*msg);
rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt);
if (wc->byte_len < rx_sz) {
- rtrs_err(sess->clt, "Sess info response is malformed: size %d\n",
+ rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n",
wc->byte_len);
goto out;
}
- err = process_info_rsp(sess, msg);
+ err = process_info_rsp(clt_path, msg);
if (err)
goto out;
- err = post_recv_sess(sess);
+ err = post_recv_path(clt_path);
if (err)
goto out;
@@ -2506,25 +2524,25 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
out:
rtrs_clt_update_wc_stats(con);
- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
- rtrs_clt_change_state_get_old(sess, state, NULL);
+ rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1);
+ rtrs_clt_change_state_get_old(clt_path, state, NULL);
}
-static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
+static int rtrs_send_path_info(struct rtrs_clt_path *clt_path)
{
- struct rtrs_clt_con *usr_con = to_clt_con(sess->s.con[0]);
+ struct rtrs_clt_con *usr_con = to_clt_con(clt_path->s.con[0]);
struct rtrs_msg_info_req *msg;
struct rtrs_iu *tx_iu, *rx_iu;
size_t rx_sz;
int err;
rx_sz = sizeof(struct rtrs_msg_info_rsp);
- rx_sz += sizeof(struct rtrs_sg_desc) * sess->queue_depth;
+ rx_sz += sizeof(struct rtrs_sg_desc) * clt_path->queue_depth;
tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL,
- sess->s.dev->ib_dev, DMA_TO_DEVICE,
+ clt_path->s.dev->ib_dev, DMA_TO_DEVICE,
rtrs_clt_info_req_done);
- rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev,
+ rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, clt_path->s.dev->ib_dev,
DMA_FROM_DEVICE, rtrs_clt_info_rsp_done);
if (!tx_iu || !rx_iu) {
err = -ENOMEM;
@@ -2533,33 +2551,34 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
/* Prepare for getting info response */
err = rtrs_iu_post_recv(&usr_con->c, rx_iu);
if (err) {
- rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err);
+ rtrs_err(clt_path->clt, "rtrs_iu_post_recv(), err: %d\n", err);
goto out;
}
rx_iu = NULL;
msg = tx_iu->buf;
msg->type = cpu_to_le16(RTRS_MSG_INFO_REQ);
- memcpy(msg->sessname, sess->s.sessname, sizeof(msg->sessname));
+ memcpy(msg->pathname, clt_path->s.sessname, sizeof(msg->pathname));
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr,
+ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev,
+ tx_iu->dma_addr,
tx_iu->size, DMA_TO_DEVICE);
/* Send info request */
err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL);
if (err) {
- rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err);
+ rtrs_err(clt_path->clt, "rtrs_iu_post_send(), err: %d\n", err);
goto out;
}
tx_iu = NULL;
/* Wait for state change */
- wait_event_interruptible_timeout(sess->state_wq,
- sess->state != RTRS_CLT_CONNECTING,
+ wait_event_interruptible_timeout(clt_path->state_wq,
+ clt_path->state != RTRS_CLT_CONNECTING,
msecs_to_jiffies(
RTRS_CONNECT_TIMEOUT_MS));
- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) {
- if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR)
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) {
+ if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTING_ERR)
err = -ECONNRESET;
else
err = -ETIMEDOUT;
@@ -2567,82 +2586,81 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
out:
if (tx_iu)
- rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(tx_iu, clt_path->s.dev->ib_dev, 1);
if (rx_iu)
- rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(rx_iu, clt_path->s.dev->ib_dev, 1);
if (err)
/* If we've never taken async path because of malloc problems */
- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
+ rtrs_clt_change_state_get_old(clt_path,
+ RTRS_CLT_CONNECTING_ERR, NULL);
return err;
}
/**
- * init_sess() - establishes all session connections and does handshake
- * @sess: client session.
+ * init_path() - establishes all path connections and does handshake
+ * @clt_path: client path.
* In case of error full close or reconnect procedure should be taken,
* because reconnect or close async works can be started.
*/
-static int init_sess(struct rtrs_clt_sess *sess)
+static int init_path(struct rtrs_clt_path *clt_path)
{
int err;
char str[NAME_MAX];
struct rtrs_addr path = {
- .src = &sess->s.src_addr,
- .dst = &sess->s.dst_addr,
+ .src = &clt_path->s.src_addr,
+ .dst = &clt_path->s.dst_addr,
};
rtrs_addr_to_str(&path, str, sizeof(str));
- mutex_lock(&sess->init_mutex);
- err = init_conns(sess);
+ mutex_lock(&clt_path->init_mutex);
+ err = init_conns(clt_path);
if (err) {
- rtrs_err(sess->clt,
+ rtrs_err(clt_path->clt,
"init_conns() failed: err=%d path=%s [%s:%u]\n", err,
- str, sess->hca_name, sess->hca_port);
+ str, clt_path->hca_name, clt_path->hca_port);
goto out;
}
- err = rtrs_send_sess_info(sess);
+ err = rtrs_send_path_info(clt_path);
if (err) {
- rtrs_err(
- sess->clt,
- "rtrs_send_sess_info() failed: err=%d path=%s [%s:%u]\n",
- err, str, sess->hca_name, sess->hca_port);
+ rtrs_err(clt_path->clt,
+ "rtrs_send_path_info() failed: err=%d path=%s [%s:%u]\n",
+ err, str, clt_path->hca_name, clt_path->hca_port);
goto out;
}
- rtrs_clt_sess_up(sess);
+ rtrs_clt_path_up(clt_path);
out:
- mutex_unlock(&sess->init_mutex);
+ mutex_unlock(&clt_path->init_mutex);
return err;
}
static void rtrs_clt_reconnect_work(struct work_struct *work)
{
- struct rtrs_clt_sess *sess;
- struct rtrs_clt *clt;
- unsigned int delay_ms;
+ struct rtrs_clt_path *clt_path;
+ struct rtrs_clt_sess *clt;
int err;
- sess = container_of(to_delayed_work(work), struct rtrs_clt_sess,
- reconnect_dwork);
- clt = sess->clt;
+ clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path,
+ reconnect_dwork);
+ clt = clt_path->clt;
- if (READ_ONCE(sess->state) != RTRS_CLT_RECONNECTING)
+ trace_rtrs_clt_reconnect_work(clt_path);
+
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_RECONNECTING)
return;
- if (sess->reconnect_attempts >= clt->max_reconnect_attempts) {
- /* Close a session completely if max attempts is reached */
- rtrs_clt_close_conns(sess, false);
+ if (clt_path->reconnect_attempts >= clt->max_reconnect_attempts) {
+ /* Close a path completely if max attempts is reached */
+ rtrs_clt_close_conns(clt_path, false);
return;
}
- sess->reconnect_attempts++;
+ clt_path->reconnect_attempts++;
- /* Stop everything */
- rtrs_clt_stop_and_destroy_conns(sess);
msleep(RTRS_RECONNECT_BACKOFF);
- if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) {
- err = init_sess(sess);
+ if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) {
+ err = init_path(clt_path);
if (err)
goto reconnect_again;
}
@@ -2650,31 +2668,30 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
return;
reconnect_again:
- if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) {
- sess->stats->reconnects.fail_cnt++;
- delay_ms = clt->reconnect_delay_sec * 1000;
- queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
- msecs_to_jiffies(delay_ms +
- prandom_u32() %
- RTRS_RECONNECT_SEED));
+ if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) {
+ clt_path->stats->reconnects.fail_cnt++;
+ queue_work(rtrs_wq, &clt_path->err_recovery_work);
}
}
static void rtrs_clt_dev_release(struct device *dev)
{
- struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev);
+ struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess,
+ dev);
+ mutex_destroy(&clt->paths_ev_mutex);
+ mutex_destroy(&clt->paths_mutex);
kfree(clt);
}
-static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
+static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num,
u16 port, size_t pdu_sz, void *priv,
void (*link_ev)(void *priv,
enum rtrs_clt_link_ev ev),
unsigned int reconnect_delay_sec,
unsigned int max_reconnect_attempts)
{
- struct rtrs_clt *clt;
+ struct rtrs_clt_sess *clt;
int err;
if (!paths_num || paths_num > MAX_PATHS_NUM)
@@ -2693,6 +2710,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
return ERR_PTR(-ENOMEM);
}
+ clt->dev.class = rtrs_clt_dev_class;
+ clt->dev.release = rtrs_clt_dev_release;
uuid_gen(&clt->paths_uuid);
INIT_LIST_HEAD_RCU(&clt->paths_list);
clt->paths_num = paths_num;
@@ -2709,60 +2728,58 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
init_waitqueue_head(&clt->permits_wait);
mutex_init(&clt->paths_ev_mutex);
mutex_init(&clt->paths_mutex);
+ device_initialize(&clt->dev);
- clt->dev.class = rtrs_clt_dev_class;
- clt->dev.release = rtrs_clt_dev_release;
err = dev_set_name(&clt->dev, "%s", sessname);
if (err)
- goto err;
+ goto err_put;
+
/*
* Suppress user space notification until
* sysfs files are created
*/
dev_set_uevent_suppress(&clt->dev, true);
- err = device_register(&clt->dev);
- if (err) {
- put_device(&clt->dev);
- goto err;
- }
+ err = device_add(&clt->dev);
+ if (err)
+ goto err_put;
clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj);
if (!clt->kobj_paths) {
err = -ENOMEM;
- goto err_dev;
+ goto err_del;
}
err = rtrs_clt_create_sysfs_root_files(clt);
if (err) {
kobject_del(clt->kobj_paths);
kobject_put(clt->kobj_paths);
- goto err_dev;
+ goto err_del;
}
dev_set_uevent_suppress(&clt->dev, false);
kobject_uevent(&clt->dev.kobj, KOBJ_ADD);
return clt;
-err_dev:
- device_unregister(&clt->dev);
-err:
+err_del:
+ device_del(&clt->dev);
+err_put:
free_percpu(clt->pcpu_path);
- kfree(clt);
+ put_device(&clt->dev);
return ERR_PTR(err);
}
-static void free_clt(struct rtrs_clt *clt)
+static void free_clt(struct rtrs_clt_sess *clt)
{
- free_permits(clt);
free_percpu(clt->pcpu_path);
- mutex_destroy(&clt->paths_ev_mutex);
- mutex_destroy(&clt->paths_mutex);
- /* release callback will free clt in last put */
+
+ /*
+ * release callback will free clt and destroy mutexes in last put
+ */
device_unregister(&clt->dev);
}
/**
- * rtrs_clt_open() - Open a session to an RTRS server
+ * rtrs_clt_open() - Open a path to an RTRS server
* @ops: holds the link event callback and the private pointer.
- * @sessname: name of the session
+ * @pathname: name of the path to an RTRS server
* @paths: Paths to be established defined by their src and dst addresses
* @paths_num: Number of elements in the @paths array
* @port: port to be used by the RTRS session
@@ -2777,24 +2794,24 @@ static void free_clt(struct rtrs_clt *clt)
*
* Return a valid pointer on success otherwise PTR_ERR.
*/
-struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
- const char *sessname,
+struct rtrs_clt_sess *rtrs_clt_open(struct rtrs_clt_ops *ops,
+ const char *pathname,
const struct rtrs_addr *paths,
size_t paths_num, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
s16 max_reconnect_attempts, u32 nr_poll_queues)
{
- struct rtrs_clt_sess *sess, *tmp;
- struct rtrs_clt *clt;
+ struct rtrs_clt_path *clt_path, *tmp;
+ struct rtrs_clt_sess *clt;
int err, i;
- if (strchr(sessname, '/') || strchr(sessname, '.')) {
- pr_err("sessname cannot contain / and .\n");
+ if (strchr(pathname, '/') || strchr(pathname, '.')) {
+ pr_err("pathname cannot contain / and .\n");
err = -EINVAL;
goto out;
}
- clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv,
+ clt = alloc_clt(pathname, paths_num, port, pdu_sz, ops->priv,
ops->link_ev,
reconnect_delay_sec,
max_reconnect_attempts);
@@ -2803,49 +2820,49 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
goto out;
}
for (i = 0; i < paths_num; i++) {
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
- sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
+ clt_path = alloc_path(clt, &paths[i], nr_cpu_ids,
nr_poll_queues);
- if (IS_ERR(sess)) {
- err = PTR_ERR(sess);
- goto close_all_sess;
+ if (IS_ERR(clt_path)) {
+ err = PTR_ERR(clt_path);
+ goto close_all_path;
}
if (!i)
- sess->for_new_clt = 1;
- list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
+ clt_path->for_new_clt = 1;
+ list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list);
- err = init_sess(sess);
+ err = init_path(clt_path);
if (err) {
- list_del_rcu(&sess->s.entry);
- rtrs_clt_close_conns(sess, true);
- free_percpu(sess->stats->pcpu_stats);
- kfree(sess->stats);
- free_sess(sess);
- goto close_all_sess;
+ list_del_rcu(&clt_path->s.entry);
+ rtrs_clt_close_conns(clt_path, true);
+ free_percpu(clt_path->stats->pcpu_stats);
+ kfree(clt_path->stats);
+ free_path(clt_path);
+ goto close_all_path;
}
- err = rtrs_clt_create_sess_files(sess);
+ err = rtrs_clt_create_path_files(clt_path);
if (err) {
- list_del_rcu(&sess->s.entry);
- rtrs_clt_close_conns(sess, true);
- free_percpu(sess->stats->pcpu_stats);
- kfree(sess->stats);
- free_sess(sess);
- goto close_all_sess;
+ list_del_rcu(&clt_path->s.entry);
+ rtrs_clt_close_conns(clt_path, true);
+ free_percpu(clt_path->stats->pcpu_stats);
+ kfree(clt_path->stats);
+ free_path(clt_path);
+ goto close_all_path;
}
}
err = alloc_permits(clt);
if (err)
- goto close_all_sess;
+ goto close_all_path;
return clt;
-close_all_sess:
- list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
- rtrs_clt_destroy_sess_files(sess, NULL);
- rtrs_clt_close_conns(sess, true);
- kobject_put(&sess->kobj);
+close_all_path:
+ list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) {
+ rtrs_clt_destroy_path_files(clt_path, NULL);
+ rtrs_clt_close_conns(clt_path, true);
+ kobject_put(&clt_path->kobj);
}
rtrs_clt_destroy_sysfs_root(clt);
free_clt(clt);
@@ -2856,37 +2873,40 @@ out:
EXPORT_SYMBOL(rtrs_clt_open);
/**
- * rtrs_clt_close() - Close a session
+ * rtrs_clt_close() - Close a path
* @clt: Session handle. Session is freed upon return.
*/
-void rtrs_clt_close(struct rtrs_clt *clt)
+void rtrs_clt_close(struct rtrs_clt_sess *clt)
{
- struct rtrs_clt_sess *sess, *tmp;
+ struct rtrs_clt_path *clt_path, *tmp;
/* Firstly forbid sysfs access */
rtrs_clt_destroy_sysfs_root(clt);
/* Now it is safe to iterate over all paths without locks */
- list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
- rtrs_clt_close_conns(sess, true);
- rtrs_clt_destroy_sess_files(sess, NULL);
- kobject_put(&sess->kobj);
+ list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) {
+ rtrs_clt_close_conns(clt_path, true);
+ rtrs_clt_destroy_path_files(clt_path, NULL);
+ kobject_put(&clt_path->kobj);
}
+ free_permits(clt);
free_clt(clt);
}
EXPORT_SYMBOL(rtrs_clt_close);
-int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess)
+int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path)
{
enum rtrs_clt_state old_state;
int err = -EBUSY;
bool changed;
- changed = rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING,
+ changed = rtrs_clt_change_state_get_old(clt_path,
+ RTRS_CLT_RECONNECTING,
&old_state);
if (changed) {
- sess->reconnect_attempts = 0;
- queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 0);
+ clt_path->reconnect_attempts = 0;
+ rtrs_clt_stop_and_destroy_conns(clt_path);
+ queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0);
}
if (changed || old_state == RTRS_CLT_RECONNECTING) {
/*
@@ -2894,15 +2914,15 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess)
* execution, so do the flush if we have queued something
* right now or work is pending.
*/
- flush_delayed_work(&sess->reconnect_dwork);
- err = (READ_ONCE(sess->state) ==
+ flush_delayed_work(&clt_path->reconnect_dwork);
+ err = (READ_ONCE(clt_path->state) ==
RTRS_CLT_CONNECTED ? 0 : -ENOTCONN);
}
return err;
}
-int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
+int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *clt_path,
const struct attribute *sysfs_self)
{
enum rtrs_clt_state old_state;
@@ -2918,27 +2938,27 @@ int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
* removing the path.
*/
do {
- rtrs_clt_close_conns(sess, true);
- changed = rtrs_clt_change_state_get_old(sess,
+ rtrs_clt_close_conns(clt_path, true);
+ changed = rtrs_clt_change_state_get_old(clt_path,
RTRS_CLT_DEAD,
&old_state);
} while (!changed && old_state != RTRS_CLT_DEAD);
if (changed) {
- rtrs_clt_remove_path_from_arr(sess);
- rtrs_clt_destroy_sess_files(sess, sysfs_self);
- kobject_put(&sess->kobj);
+ rtrs_clt_remove_path_from_arr(clt_path);
+ rtrs_clt_destroy_path_files(clt_path, sysfs_self);
+ kobject_put(&clt_path->kobj);
}
return 0;
}
-void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value)
+void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt_sess *clt, int value)
{
clt->max_reconnect_attempts = (unsigned int)value;
}
-int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt)
+int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt_sess *clt)
{
return (int)clt->max_reconnect_attempts;
}
@@ -2968,12 +2988,12 @@ int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt)
* On dir=WRITE rtrs client will rdma write data in sg to server side.
*/
int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
- struct rtrs_clt *clt, struct rtrs_permit *permit,
- const struct kvec *vec, size_t nr, size_t data_len,
- struct scatterlist *sg, unsigned int sg_cnt)
+ struct rtrs_clt_sess *clt, struct rtrs_permit *permit,
+ const struct kvec *vec, size_t nr, size_t data_len,
+ struct scatterlist *sg, unsigned int sg_cnt)
{
struct rtrs_clt_io_req *req;
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
enum dma_data_direction dma_dir;
int err = -ECONNABORTED, i;
@@ -2995,19 +3015,19 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
rcu_read_lock();
for (path_it_init(&it, clt);
- (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
continue;
- if (usr_len + hdr_len > sess->max_hdr_size) {
- rtrs_wrn_rl(sess->clt,
+ if (usr_len + hdr_len > clt_path->max_hdr_size) {
+ rtrs_wrn_rl(clt_path->clt,
"%s request failed, user message size is %zu and header length %zu, but max size is %u\n",
dir == READ ? "Read" : "Write",
- usr_len, hdr_len, sess->max_hdr_size);
+ usr_len, hdr_len, clt_path->max_hdr_size);
err = -EMSGSIZE;
break;
}
- req = rtrs_clt_get_req(sess, ops->conf_fn, permit, ops->priv,
+ req = rtrs_clt_get_req(clt_path, ops->conf_fn, permit, ops->priv,
vec, usr_len, sg, sg_cnt, data_len,
dma_dir);
if (dir == READ)
@@ -3028,21 +3048,21 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
}
EXPORT_SYMBOL(rtrs_clt_request);
-int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt_sess *clt, unsigned int index)
{
/* If no path, return -1 for block layer not to try again */
int cnt = -1;
struct rtrs_con *con;
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
struct path_it it;
rcu_read_lock();
for (path_it_init(&it, clt);
- (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
continue;
- con = sess->s.con[index + 1];
+ con = clt_path->s.con[index + 1];
cnt = ib_process_cq_direct(con->cq, -1);
if (cnt)
break;
@@ -3062,7 +3082,7 @@ EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
* 0 on success
* -ECOMM no connection to the server
*/
-int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr)
+int rtrs_clt_query(struct rtrs_clt_sess *clt, struct rtrs_attrs *attr)
{
if (!rtrs_clt_is_connected(clt))
return -ECOMM;
@@ -3077,15 +3097,15 @@ int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr)
}
EXPORT_SYMBOL(rtrs_clt_query);
-int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
+int rtrs_clt_create_path_from_sysfs(struct rtrs_clt_sess *clt,
struct rtrs_addr *addr)
{
- struct rtrs_clt_sess *sess;
+ struct rtrs_clt_path *clt_path;
int err;
- sess = alloc_sess(clt, addr, nr_cpu_ids, 0);
- if (IS_ERR(sess))
- return PTR_ERR(sess);
+ clt_path = alloc_path(clt, addr, nr_cpu_ids, 0);
+ if (IS_ERR(clt_path))
+ return PTR_ERR(clt_path);
mutex_lock(&clt->paths_mutex);
if (clt->paths_num == 0) {
@@ -3094,7 +3114,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
* the addition of the first path is like a new session for
* the storage server
*/
- sess->for_new_clt = 1;
+ clt_path->for_new_clt = 1;
}
mutex_unlock(&clt->paths_mutex);
@@ -3104,24 +3124,24 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
* IO will never grab it. Also it is very important to add
* path before init, since init fires LINK_CONNECTED event.
*/
- rtrs_clt_add_path_to_arr(sess);
+ rtrs_clt_add_path_to_arr(clt_path);
- err = init_sess(sess);
+ err = init_path(clt_path);
if (err)
- goto close_sess;
+ goto close_path;
- err = rtrs_clt_create_sess_files(sess);
+ err = rtrs_clt_create_path_files(clt_path);
if (err)
- goto close_sess;
+ goto close_path;
return 0;
-close_sess:
- rtrs_clt_remove_path_from_arr(sess);
- rtrs_clt_close_conns(sess, true);
- free_percpu(sess->stats->pcpu_stats);
- kfree(sess->stats);
- free_sess(sess);
+close_path:
+ rtrs_clt_remove_path_from_arr(clt_path);
+ rtrs_clt_close_conns(clt_path, true);
+ free_percpu(clt_path->stats->pcpu_stats);
+ kfree(clt_path->stats);
+ free_path(clt_path);
return err;
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 9afffccff973..f848c0392d98 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -124,9 +124,9 @@ struct rtrs_rbuf {
u32 rkey;
};
-struct rtrs_clt_sess {
- struct rtrs_sess s;
- struct rtrs_clt *clt;
+struct rtrs_clt_path {
+ struct rtrs_path s;
+ struct rtrs_clt_sess *clt;
wait_queue_head_t state_wq;
enum rtrs_clt_state state;
atomic_t connected_cnt;
@@ -134,6 +134,7 @@ struct rtrs_clt_sess {
struct rtrs_clt_io_req *reqs;
struct delayed_work reconnect_dwork;
struct work_struct close_work;
+ struct work_struct err_recovery_work;
unsigned int reconnect_attempts;
bool established;
struct rtrs_rbuf *rbufs;
@@ -153,10 +154,10 @@ struct rtrs_clt_sess {
*mp_skip_entry;
};
-struct rtrs_clt {
+struct rtrs_clt_sess {
struct list_head paths_list; /* rcu protected list */
size_t paths_num;
- struct rtrs_clt_sess
+ struct rtrs_clt_path
__rcu * __percpu *pcpu_path;
uuid_t paths_uuid;
int paths_up;
@@ -186,31 +187,32 @@ static inline struct rtrs_clt_con *to_clt_con(struct rtrs_con *c)
return container_of(c, struct rtrs_clt_con, c);
}
-static inline struct rtrs_clt_sess *to_clt_sess(struct rtrs_sess *s)
+static inline struct rtrs_clt_path *to_clt_path(struct rtrs_path *s)
{
- return container_of(s, struct rtrs_clt_sess, s);
+ return container_of(s, struct rtrs_clt_path, s);
}
-static inline int permit_size(struct rtrs_clt *clt)
+static inline int permit_size(struct rtrs_clt_sess *clt)
{
return sizeof(struct rtrs_permit) + clt->pdu_sz;
}
-static inline struct rtrs_permit *get_permit(struct rtrs_clt *clt, int idx)
+static inline struct rtrs_permit *get_permit(struct rtrs_clt_sess *clt,
+ int idx)
{
return (struct rtrs_permit *)(clt->permits + permit_size(clt) * idx);
}
-int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess);
-void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait);
-int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
+int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *path);
+void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait);
+int rtrs_clt_create_path_from_sysfs(struct rtrs_clt_sess *clt,
struct rtrs_addr *addr);
-int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
+int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *path,
const struct attribute *sysfs_self);
-void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value);
-int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt);
-void free_sess(struct rtrs_clt_sess *sess);
+void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt_sess *clt, int value);
+int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt_sess *clt);
+void free_path(struct rtrs_clt_path *clt_path);
/* rtrs-clt-stats.c */
@@ -239,11 +241,11 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats,
/* rtrs-clt-sysfs.c */
-int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt);
-void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt);
+int rtrs_clt_create_sysfs_root_files(struct rtrs_clt_sess *clt);
+void rtrs_clt_destroy_sysfs_root(struct rtrs_clt_sess *clt);
-int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess);
-void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess,
+int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path);
+void rtrs_clt_destroy_path_files(struct rtrs_clt_path *clt_path,
const struct attribute *sysfs_self);
#endif /* RTRS_CLT_H */
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 78eac9a4f703..a2420eecaf5a 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -23,6 +23,16 @@
#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
__stringify(RTRS_PROTO_VER_MINOR)
+/*
+ * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
+ * and the minimum chunk size is 4096 (2^12).
+ * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
+ * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
+ * Therefore the pratical max value of sess_queue_depth is
+ * somewhere between 1 and 65535 and it depends on the system.
+ */
+#define MAX_SESS_QUEUE_DEPTH 65535
+
enum rtrs_imm_const {
MAX_IMM_TYPE_BITS = 4,
MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
@@ -46,16 +56,6 @@ enum {
MAX_PATHS_NUM = 128,
- /*
- * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
- * and the minimum chunk size is 4096 (2^12).
- * So the maximum sess_queue_depth is 65536 (2^16) in theory.
- * But mempool_create, create_qp and ib_post_send fail with
- * "cannot allocate memory" error if sess_queue_depth is too big.
- * Therefore the pratical max value of sess_queue_depth is
- * somewhere between 1 and 65534 and it depends on the system.
- */
- MAX_SESS_QUEUE_DEPTH = 65535,
MIN_CHUNK_SIZE = 8192,
RTRS_HB_INTERVAL_MS = 5000,
@@ -90,7 +90,7 @@ struct rtrs_ib_dev {
};
struct rtrs_con {
- struct rtrs_sess *sess;
+ struct rtrs_path *path;
struct ib_qp *qp;
struct ib_cq *cq;
struct rdma_cm_id *cm_id;
@@ -100,7 +100,7 @@ struct rtrs_con {
atomic_t sq_wr_avail;
};
-struct rtrs_sess {
+struct rtrs_path {
struct list_head entry;
struct sockaddr_storage dst_addr;
struct sockaddr_storage src_addr;
@@ -229,11 +229,11 @@ struct rtrs_msg_conn_rsp {
/**
* struct rtrs_msg_info_req
* @type: @RTRS_MSG_INFO_REQ
- * @sessname: Session name chosen by client
+ * @pathname: Path name chosen by client
*/
struct rtrs_msg_info_req {
__le16 type;
- u8 sessname[NAME_MAX];
+ u8 pathname[NAME_MAX];
u8 reserved[15];
};
@@ -313,19 +313,19 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
-int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
+int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
u32 max_send_sge, int cq_vector, int nr_cqe,
u32 max_send_wr, u32 max_recv_wr,
enum ib_poll_context poll_ctx);
void rtrs_cq_qp_destroy(struct rtrs_con *con);
-void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
+void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
unsigned int interval_ms, unsigned int missed_max,
void (*err_handler)(struct rtrs_con *con),
struct workqueue_struct *wq);
-void rtrs_start_hb(struct rtrs_sess *sess);
-void rtrs_stop_hb(struct rtrs_sess *sess);
-void rtrs_send_hb_ack(struct rtrs_sess *sess);
+void rtrs_start_hb(struct rtrs_path *path);
+void rtrs_stop_hb(struct rtrs_path *path);
+void rtrs_send_hb_ack(struct rtrs_path *path);
void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
struct rtrs_rdma_dev_pd *pool);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
index 44b1c1652131..2aff1213a19d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
@@ -14,9 +14,14 @@
int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable)
{
if (enable) {
- struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats;
+ int cpu;
+ struct rtrs_srv_stats_rdma_stats *r;
+
+ for_each_possible_cpu(cpu) {
+ r = per_cpu_ptr(stats->rdma_stats, cpu);
+ memset(r, 0, sizeof(*r));
+ }
- memset(r, 0, sizeof(*r));
return 0;
}
@@ -25,11 +30,22 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable)
ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page)
{
- struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats;
+ int cpu;
+ struct rtrs_srv_stats_rdma_stats sum;
+ struct rtrs_srv_stats_rdma_stats *r;
+
+ memset(&sum, 0, sizeof(sum));
+
+ for_each_possible_cpu(cpu) {
+ r = per_cpu_ptr(stats->rdma_stats, cpu);
+
+ sum.dir[READ].cnt += r->dir[READ].cnt;
+ sum.dir[READ].size_total += r->dir[READ].size_total;
+ sum.dir[WRITE].cnt += r->dir[WRITE].cnt;
+ sum.dir[WRITE].size_total += r->dir[WRITE].size_total;
+ }
- return sysfs_emit(page, "%lld %lld %lld %lldn %u\n",
- (s64)atomic64_read(&r->dir[READ].cnt),
- (s64)atomic64_read(&r->dir[READ].size_total),
- (s64)atomic64_read(&r->dir[WRITE].cnt),
- (s64)atomic64_read(&r->dir[WRITE].size_total), 0);
+ return sysfs_emit(page, "%llu %llu %llu %llu\n",
+ sum.dir[READ].cnt, sum.dir[READ].size_total,
+ sum.dir[WRITE].cnt, sum.dir[WRITE].size_total);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index 9c43ce5ba1c1..2a3c9ac64a42 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -15,10 +15,10 @@
static void rtrs_srv_release(struct kobject *kobj)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
- kfree(sess);
+ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
+ kfree(srv_path);
}
static struct kobj_type ktype = {
@@ -36,24 +36,25 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- struct rtrs_srv_sess *sess;
- struct rtrs_sess *s;
+ struct rtrs_srv_path *srv_path;
+ struct rtrs_path *s;
char str[MAXHOSTNAMELEN];
- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
- s = &sess->s;
+ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
+ s = &srv_path->s;
if (!sysfs_streq(buf, "1")) {
rtrs_err(s, "%s: invalid value: '%s'\n",
attr->attr.name, buf);
return -EINVAL;
}
- sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str));
+ sockaddr_to_str((struct sockaddr *)&srv_path->s.dst_addr, str,
+ sizeof(str));
rtrs_info(s, "disconnect for path %s requested\n", str);
/* first remove sysfs itself to avoid deadlock */
- sysfs_remove_file_self(&sess->kobj, &attr->attr);
- close_sess(sess);
+ sysfs_remove_file_self(&srv_path->kobj, &attr->attr);
+ close_path(srv_path);
return count;
}
@@ -66,11 +67,11 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
struct rtrs_con *usr_con;
- sess = container_of(kobj, typeof(*sess), kobj);
- usr_con = sess->s.con[0];
+ srv_path = container_of(kobj, typeof(*srv_path), kobj);
+ usr_con = srv_path->s.con[0];
return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num);
}
@@ -82,11 +83,11 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
+ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
- return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name);
+ return sysfs_emit(page, "%s\n", srv_path->s.dev->ib_dev->name);
}
static struct kobj_attribute rtrs_srv_hca_name_attr =
@@ -96,11 +97,11 @@ static ssize_t rtrs_srv_src_addr_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
int cnt;
- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
- cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr,
+ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
+ cnt = sockaddr_to_str((struct sockaddr *)&srv_path->s.dst_addr,
page, PAGE_SIZE);
return cnt + sysfs_emit_at(page, cnt, "\n");
}
@@ -112,11 +113,11 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
int len;
- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
- len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page,
+ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
+ len = sockaddr_to_str((struct sockaddr *)&srv_path->s.src_addr, page,
PAGE_SIZE);
len += sysfs_emit_at(page, len, "\n");
return len;
@@ -125,7 +126,7 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj,
static struct kobj_attribute rtrs_srv_dst_addr_attr =
__ATTR(dst_addr, 0444, rtrs_srv_dst_addr_show, NULL);
-static struct attribute *rtrs_srv_sess_attrs[] = {
+static struct attribute *rtrs_srv_path_attrs[] = {
&rtrs_srv_hca_name_attr.attr,
&rtrs_srv_hca_port_attr.attr,
&rtrs_srv_src_addr_attr.attr,
@@ -134,8 +135,8 @@ static struct attribute *rtrs_srv_sess_attrs[] = {
NULL,
};
-static const struct attribute_group rtrs_srv_sess_attr_group = {
- .attrs = rtrs_srv_sess_attrs,
+static const struct attribute_group rtrs_srv_path_attr_group = {
+ .attrs = rtrs_srv_path_attrs,
};
STAT_ATTR(struct rtrs_srv_stats, rdma,
@@ -151,9 +152,9 @@ static const struct attribute_group rtrs_srv_stats_attr_group = {
.attrs = rtrs_srv_stats_attrs,
};
-static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
+static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
int err = 0;
mutex_lock(&srv->paths_mutex);
@@ -164,7 +165,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
goto unlock;
}
srv->dev.class = rtrs_dev_class;
- err = dev_set_name(&srv->dev, "%s", sess->s.sessname);
+ err = dev_set_name(&srv->dev, "%s", srv_path->s.sessname);
if (err)
goto unlock;
@@ -196,9 +197,9 @@ unlock:
}
static void
-rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
+rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
mutex_lock(&srv->paths_mutex);
if (!--srv->dev_ref) {
@@ -213,33 +214,35 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
}
}
-static void rtrs_srv_sess_stats_release(struct kobject *kobj)
+static void rtrs_srv_path_stats_release(struct kobject *kobj)
{
struct rtrs_srv_stats *stats;
stats = container_of(kobj, struct rtrs_srv_stats, kobj_stats);
+ free_percpu(stats->rdma_stats);
+
kfree(stats);
}
static struct kobj_type ktype_stats = {
.sysfs_ops = &kobj_sysfs_ops,
- .release = rtrs_srv_sess_stats_release,
+ .release = rtrs_srv_path_stats_release,
};
-static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess)
+static int rtrs_srv_create_stats_files(struct rtrs_srv_path *srv_path)
{
int err;
- struct rtrs_sess *s = &sess->s;
+ struct rtrs_path *s = &srv_path->s;
- err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats,
- &sess->kobj, "stats");
+ err = kobject_init_and_add(&srv_path->stats->kobj_stats, &ktype_stats,
+ &srv_path->kobj, "stats");
if (err) {
rtrs_err(s, "kobject_init_and_add(): %d\n", err);
- kobject_put(&sess->stats->kobj_stats);
+ kobject_put(&srv_path->stats->kobj_stats);
return err;
}
- err = sysfs_create_group(&sess->stats->kobj_stats,
+ err = sysfs_create_group(&srv_path->stats->kobj_stats,
&rtrs_srv_stats_attr_group);
if (err) {
rtrs_err(s, "sysfs_create_group(): %d\n", err);
@@ -249,64 +252,64 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess)
return 0;
err:
- kobject_del(&sess->stats->kobj_stats);
- kobject_put(&sess->stats->kobj_stats);
+ kobject_del(&srv_path->stats->kobj_stats);
+ kobject_put(&srv_path->stats->kobj_stats);
return err;
}
-int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess)
+int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
- struct rtrs_sess *s = &sess->s;
+ struct rtrs_srv_sess *srv = srv_path->srv;
+ struct rtrs_path *s = &srv_path->s;
char str[NAME_MAX];
int err;
struct rtrs_addr path = {
- .src = &sess->s.dst_addr,
- .dst = &sess->s.src_addr,
+ .src = &srv_path->s.dst_addr,
+ .dst = &srv_path->s.src_addr,
};
rtrs_addr_to_str(&path, str, sizeof(str));
- err = rtrs_srv_create_once_sysfs_root_folders(sess);
+ err = rtrs_srv_create_once_sysfs_root_folders(srv_path);
if (err)
return err;
- err = kobject_init_and_add(&sess->kobj, &ktype, srv->kobj_paths,
+ err = kobject_init_and_add(&srv_path->kobj, &ktype, srv->kobj_paths,
"%s", str);
if (err) {
rtrs_err(s, "kobject_init_and_add(): %d\n", err);
goto destroy_root;
}
- err = sysfs_create_group(&sess->kobj, &rtrs_srv_sess_attr_group);
+ err = sysfs_create_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
if (err) {
rtrs_err(s, "sysfs_create_group(): %d\n", err);
goto put_kobj;
}
- err = rtrs_srv_create_stats_files(sess);
+ err = rtrs_srv_create_stats_files(srv_path);
if (err)
goto remove_group;
return 0;
remove_group:
- sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
+ sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
put_kobj:
- kobject_del(&sess->kobj);
+ kobject_del(&srv_path->kobj);
destroy_root:
- kobject_put(&sess->kobj);
- rtrs_srv_destroy_once_sysfs_root_folders(sess);
+ kobject_put(&srv_path->kobj);
+ rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
return err;
}
-void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess)
+void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path)
{
- if (sess->kobj.state_in_sysfs) {
- kobject_del(&sess->stats->kobj_stats);
- kobject_put(&sess->stats->kobj_stats);
- sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
- kobject_put(&sess->kobj);
+ if (srv_path->kobj.state_in_sysfs) {
+ kobject_del(&srv_path->stats->kobj_stats);
+ kobject_put(&srv_path->stats->kobj_stats);
+ sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
+ kobject_put(&srv_path->kobj);
- rtrs_srv_destroy_once_sysfs_root_folders(sess);
+ rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c
new file mode 100644
index 000000000000..29ca59ceb0dd
--- /dev/null
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#include "rtrs.h"
+#include "rtrs-pri.h"
+#include "rtrs-srv.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "rtrs-srv-trace.h"
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h
new file mode 100644
index 000000000000..587d3e033081
--- /dev/null
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rtrs_srv
+
+#if !defined(_TRACE_RTRS_SRV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RTRS_SRV_H
+
+#include <linux/tracepoint.h>
+
+struct rtrs_srv_op;
+struct rtrs_srv_con;
+struct rtrs_srv_path;
+
+TRACE_DEFINE_ENUM(RTRS_SRV_CONNECTING);
+TRACE_DEFINE_ENUM(RTRS_SRV_CONNECTED);
+TRACE_DEFINE_ENUM(RTRS_SRV_CLOSING);
+TRACE_DEFINE_ENUM(RTRS_SRV_CLOSED);
+
+#define show_rtrs_srv_state(x) \
+ __print_symbolic(x, \
+ { RTRS_SRV_CONNECTING, "CONNECTING" }, \
+ { RTRS_SRV_CONNECTED, "CONNECTED" }, \
+ { RTRS_SRV_CLOSING, "CLOSING" }, \
+ { RTRS_SRV_CLOSED, "CLOSED" })
+
+TRACE_EVENT(send_io_resp_imm,
+ TP_PROTO(struct rtrs_srv_op *id,
+ bool need_inval,
+ bool always_invalidate,
+ int errno),
+
+ TP_ARGS(id, need_inval, always_invalidate, errno),
+
+ TP_STRUCT__entry(
+ __field(u8, dir)
+ __field(bool, need_inval)
+ __field(bool, always_invalidate)
+ __field(u32, msg_id)
+ __field(int, wr_cnt)
+ __field(u32, signal_interval)
+ __field(int, state)
+ __field(int, errno)
+ __array(char, sessname, NAME_MAX)
+ ),
+
+ TP_fast_assign(
+ struct rtrs_srv_con *con = id->con;
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
+
+ __entry->dir = id->dir;
+ __entry->state = srv_path->state;
+ __entry->errno = errno;
+ __entry->need_inval = need_inval;
+ __entry->always_invalidate = always_invalidate;
+ __entry->msg_id = id->msg_id;
+ __entry->wr_cnt = atomic_read(&con->c.wr_cnt);
+ __entry->signal_interval = s->signal_interval;
+ memcpy(__entry->sessname, kobject_name(&srv_path->kobj), NAME_MAX);
+ ),
+
+ TP_printk("sess='%s' state='%s' dir=%s err='%d' inval='%d' glob-inval='%d' msgid='%u' wrcnt='%d' sig-interval='%u'",
+ __entry->sessname,
+ show_rtrs_srv_state(__entry->state),
+ __print_symbolic(__entry->dir,
+ { READ, "READ" },
+ { WRITE, "WRITE" }),
+ __entry->errno,
+ __entry->need_inval,
+ __entry->always_invalidate,
+ __entry->msg_id,
+ __entry->wr_cnt,
+ __entry->signal_interval
+ )
+);
+
+#endif /* _TRACE_RTRS_SRV_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE rtrs-srv-trace
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 7df71f8cf149..22d7ba05e9fe 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -11,12 +11,12 @@
#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
#include <linux/module.h>
-#include <linux/mempool.h>
#include "rtrs-srv.h"
#include "rtrs-log.h"
#include <rdma/ib_cm.h>
#include <rdma/ib_verbs.h>
+#include "rtrs-srv-trace.h"
MODULE_DESCRIPTION("RDMA Transport Server");
MODULE_LICENSE("GPL");
@@ -26,11 +26,7 @@ MODULE_LICENSE("GPL");
#define DEFAULT_SESS_QUEUE_DEPTH 512
#define MAX_HDR_SIZE PAGE_SIZE
-/* We guarantee to serve 10 paths at least */
-#define CHUNK_POOL_SZ 10
-
static struct rtrs_rdma_dev_pd dev_pd;
-static mempool_t *chunk_pool;
struct class *rtrs_dev_class;
static struct rtrs_srv_ib_ctx ib_ctx;
@@ -62,19 +58,14 @@ static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c)
return container_of(c, struct rtrs_srv_con, c);
}
-static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s)
-{
- return container_of(s, struct rtrs_srv_sess, s);
-}
-
-static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess,
+static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
enum rtrs_srv_state new_state)
{
enum rtrs_srv_state old_state;
bool changed = false;
- spin_lock_irq(&sess->state_lock);
- old_state = sess->state;
+ spin_lock_irq(&srv_path->state_lock);
+ old_state = srv_path->state;
switch (new_state) {
case RTRS_SRV_CONNECTED:
if (old_state == RTRS_SRV_CONNECTING)
@@ -93,8 +84,8 @@ static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess,
break;
}
if (changed)
- sess->state = new_state;
- spin_unlock_irq(&sess->state_lock);
+ srv_path->state = new_state;
+ spin_unlock_irq(&srv_path->state_lock);
return changed;
}
@@ -106,16 +97,16 @@ static void free_id(struct rtrs_srv_op *id)
kfree(id);
}
-static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess)
+static void rtrs_srv_free_ops_ids(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
int i;
- if (sess->ops_ids) {
+ if (srv_path->ops_ids) {
for (i = 0; i < srv->queue_depth; i++)
- free_id(sess->ops_ids[i]);
- kfree(sess->ops_ids);
- sess->ops_ids = NULL;
+ free_id(srv_path->ops_ids[i]);
+ kfree(srv_path->ops_ids);
+ srv_path->ops_ids = NULL;
}
}
@@ -127,21 +118,24 @@ static struct ib_cqe io_comp_cqe = {
static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref)
{
- struct rtrs_srv_sess *sess = container_of(ref, struct rtrs_srv_sess, ids_inflight_ref);
+ struct rtrs_srv_path *srv_path = container_of(ref,
+ struct rtrs_srv_path,
+ ids_inflight_ref);
- percpu_ref_exit(&sess->ids_inflight_ref);
- complete(&sess->complete_done);
+ percpu_ref_exit(&srv_path->ids_inflight_ref);
+ complete(&srv_path->complete_done);
}
-static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess)
+static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_srv_op *id;
int i, ret;
- sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids),
- GFP_KERNEL);
- if (!sess->ops_ids)
+ srv_path->ops_ids = kcalloc(srv->queue_depth,
+ sizeof(*srv_path->ops_ids),
+ GFP_KERNEL);
+ if (!srv_path->ops_ids)
goto err;
for (i = 0; i < srv->queue_depth; ++i) {
@@ -149,44 +143,44 @@ static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess)
if (!id)
goto err;
- sess->ops_ids[i] = id;
+ srv_path->ops_ids[i] = id;
}
- ret = percpu_ref_init(&sess->ids_inflight_ref,
+ ret = percpu_ref_init(&srv_path->ids_inflight_ref,
rtrs_srv_inflight_ref_release, 0, GFP_KERNEL);
if (ret) {
pr_err("Percpu reference init failed\n");
goto err;
}
- init_completion(&sess->complete_done);
+ init_completion(&srv_path->complete_done);
return 0;
err:
- rtrs_srv_free_ops_ids(sess);
+ rtrs_srv_free_ops_ids(srv_path);
return -ENOMEM;
}
-static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess)
+static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_path *srv_path)
{
- percpu_ref_get(&sess->ids_inflight_ref);
+ percpu_ref_get(&srv_path->ids_inflight_ref);
}
-static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess)
+static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_path *srv_path)
{
- percpu_ref_put(&sess->ids_inflight_ref);
+ percpu_ref_put(&srv_path->ids_inflight_ref);
}
static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
if (wc->status != IB_WC_SUCCESS) {
rtrs_err(s, "REG MR failed: %s\n",
ib_wc_status_msg(wc->status));
- close_sess(sess);
+ close_path(srv_path);
return;
}
}
@@ -197,9 +191,9 @@ static struct ib_cqe local_reg_cqe = {
static int rdma_write_sg(struct rtrs_srv_op *id)
{
- struct rtrs_sess *s = id->con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
- dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
+ struct rtrs_path *s = id->con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ dma_addr_t dma_addr = srv_path->dma_addr[id->msg_id];
struct rtrs_srv_mr *srv_mr;
struct ib_send_wr inv_wr;
struct ib_rdma_wr imm_wr;
@@ -233,7 +227,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
return -EINVAL;
}
- plist->lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ plist->lkey = srv_path->s.dev->ib_pd->local_dma_lkey;
offset += plist->length;
wr->wr.sg_list = plist;
@@ -284,7 +278,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
if (always_invalidate) {
struct rtrs_msg_rkey_rsp *msg;
- srv_mr = &sess->mrs[id->msg_id];
+ srv_mr = &srv_path->mrs[id->msg_id];
rwr.wr.opcode = IB_WR_REG_MR;
rwr.wr.wr_cqe = &local_reg_cqe;
rwr.wr.num_sge = 0;
@@ -300,11 +294,11 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
list.addr = srv_mr->iu->dma_addr;
list.length = sizeof(*msg);
- list.lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey;
imm_wr.wr.sg_list = &list;
imm_wr.wr.num_sge = 1;
imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM;
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev,
+ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
srv_mr->iu->dma_addr,
srv_mr->iu->size, DMA_TO_DEVICE);
} else {
@@ -317,7 +311,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
0, need_inval));
imm_wr.wr.wr_cqe = &io_comp_cqe;
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr,
+ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, dma_addr,
offset, DMA_BIDIRECTIONAL);
err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL);
@@ -341,8 +335,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
int errno)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
struct ib_send_wr inv_wr, *wr = NULL;
struct ib_rdma_wr imm_wr;
struct ib_reg_wr rwr;
@@ -377,6 +371,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
}
}
+ trace_send_io_resp_imm(id, need_inval, always_invalidate, errno);
+
if (need_inval && always_invalidate) {
wr = &inv_wr;
inv_wr.next = &rwr.wr;
@@ -402,7 +398,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
struct ib_sge list;
struct rtrs_msg_rkey_rsp *msg;
- srv_mr = &sess->mrs[id->msg_id];
+ srv_mr = &srv_path->mrs[id->msg_id];
rwr.wr.next = &imm_wr.wr;
rwr.wr.opcode = IB_WR_REG_MR;
rwr.wr.wr_cqe = &local_reg_cqe;
@@ -419,11 +415,11 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
list.addr = srv_mr->iu->dma_addr;
list.length = sizeof(*msg);
- list.lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey;
imm_wr.wr.sg_list = &list;
imm_wr.wr.num_sge = 1;
imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM;
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev,
+ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
srv_mr->iu->dma_addr,
srv_mr->iu->size, DMA_TO_DEVICE);
} else {
@@ -444,11 +440,11 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
return err;
}
-void close_sess(struct rtrs_srv_sess *sess)
+void close_path(struct rtrs_srv_path *srv_path)
{
- if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING))
- queue_work(rtrs_wq, &sess->close_work);
- WARN_ON(sess->state != RTRS_SRV_CLOSING);
+ if (rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSING))
+ queue_work(rtrs_wq, &srv_path->close_work);
+ WARN_ON(srv_path->state != RTRS_SRV_CLOSING);
}
static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state)
@@ -480,35 +476,35 @@ static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state)
*/
bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
struct rtrs_srv_con *con;
- struct rtrs_sess *s;
+ struct rtrs_path *s;
int err;
if (WARN_ON(!id))
return true;
con = id->con;
- s = con->c.sess;
- sess = to_srv_sess(s);
+ s = con->c.path;
+ srv_path = to_srv_path(s);
id->status = status;
- if (sess->state != RTRS_SRV_CONNECTED) {
+ if (srv_path->state != RTRS_SRV_CONNECTED) {
rtrs_err_rl(s,
- "Sending I/O response failed, session %s is disconnected, sess state %s\n",
- kobject_name(&sess->kobj),
- rtrs_srv_state_str(sess->state));
+ "Sending I/O response failed, server path %s is disconnected, path state %s\n",
+ kobject_name(&srv_path->kobj),
+ rtrs_srv_state_str(srv_path->state));
goto out;
}
if (always_invalidate) {
- struct rtrs_srv_mr *mr = &sess->mrs[id->msg_id];
+ struct rtrs_srv_mr *mr = &srv_path->mrs[id->msg_id];
ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
}
if (atomic_sub_return(1, &con->c.sq_wr_avail) < 0) {
- rtrs_err(s, "IB send queue full: sess=%s cid=%d\n",
- kobject_name(&sess->kobj),
+ rtrs_err(s, "IB send queue full: srv_path=%s cid=%d\n",
+ kobject_name(&srv_path->kobj),
con->c.cid);
atomic_add(1, &con->c.sq_wr_avail);
spin_lock(&con->rsp_wr_wait_lock);
@@ -523,12 +519,12 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
err = rdma_write_sg(id);
if (err) {
- rtrs_err_rl(s, "IO response failed: %d: sess=%s\n", err,
- kobject_name(&sess->kobj));
- close_sess(sess);
+ rtrs_err_rl(s, "IO response failed: %d: srv_path=%s\n", err,
+ kobject_name(&srv_path->kobj));
+ close_path(srv_path);
}
out:
- rtrs_srv_put_ops_ids(sess);
+ rtrs_srv_put_ops_ids(srv_path);
return true;
}
EXPORT_SYMBOL(rtrs_srv_resp_rdma);
@@ -538,33 +534,33 @@ EXPORT_SYMBOL(rtrs_srv_resp_rdma);
* @srv: Session pointer
* @priv: The private pointer that is associated with the session.
*/
-void rtrs_srv_set_sess_priv(struct rtrs_srv *srv, void *priv)
+void rtrs_srv_set_sess_priv(struct rtrs_srv_sess *srv, void *priv)
{
srv->priv = priv;
}
EXPORT_SYMBOL(rtrs_srv_set_sess_priv);
-static void unmap_cont_bufs(struct rtrs_srv_sess *sess)
+static void unmap_cont_bufs(struct rtrs_srv_path *srv_path)
{
int i;
- for (i = 0; i < sess->mrs_num; i++) {
+ for (i = 0; i < srv_path->mrs_num; i++) {
struct rtrs_srv_mr *srv_mr;
- srv_mr = &sess->mrs[i];
- rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
+ srv_mr = &srv_path->mrs[i];
+ rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
ib_dereg_mr(srv_mr->mr);
- ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl,
+ ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl,
srv_mr->sgt.nents, DMA_BIDIRECTIONAL);
sg_free_table(&srv_mr->sgt);
}
- kfree(sess->mrs);
+ kfree(srv_path->mrs);
}
-static int map_cont_bufs(struct rtrs_srv_sess *sess)
+static int map_cont_bufs(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
- struct rtrs_sess *ss = &sess->s;
+ struct rtrs_srv_sess *srv = srv_path->srv;
+ struct rtrs_path *ss = &srv_path->s;
int i, mri, err, mrs_num;
unsigned int chunk_bits;
int chunks_per_mr = 1;
@@ -581,23 +577,23 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
mrs_num = srv->queue_depth;
} else {
chunks_per_mr =
- sess->s.dev->ib_dev->attrs.max_fast_reg_page_list_len;
+ srv_path->s.dev->ib_dev->attrs.max_fast_reg_page_list_len;
mrs_num = DIV_ROUND_UP(srv->queue_depth, chunks_per_mr);
chunks_per_mr = DIV_ROUND_UP(srv->queue_depth, mrs_num);
}
- sess->mrs = kcalloc(mrs_num, sizeof(*sess->mrs), GFP_KERNEL);
- if (!sess->mrs)
+ srv_path->mrs = kcalloc(mrs_num, sizeof(*srv_path->mrs), GFP_KERNEL);
+ if (!srv_path->mrs)
return -ENOMEM;
- sess->mrs_num = mrs_num;
+ srv_path->mrs_num = mrs_num;
for (mri = 0; mri < mrs_num; mri++) {
- struct rtrs_srv_mr *srv_mr = &sess->mrs[mri];
+ struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri];
struct sg_table *sgt = &srv_mr->sgt;
struct scatterlist *s;
struct ib_mr *mr;
- int nr, chunks;
+ int nr, nr_sgt, chunks;
chunks = chunks_per_mr * mri;
if (!always_invalidate)
@@ -612,19 +608,19 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
sg_set_page(s, srv->chunks[chunks + i],
max_chunk_size, 0);
- nr = ib_dma_map_sg(sess->s.dev->ib_dev, sgt->sgl,
+ nr_sgt = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl,
sgt->nents, DMA_BIDIRECTIONAL);
- if (nr < sgt->nents) {
- err = nr < 0 ? nr : -EINVAL;
+ if (!nr_sgt) {
+ err = -EINVAL;
goto free_sg;
}
- mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
- sgt->nents);
+ mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
+ nr_sgt);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
goto unmap_sg;
}
- nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents,
+ nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
NULL, max_chunk_size);
if (nr < 0 || nr < sgt->nents) {
err = nr < 0 ? nr : -EINVAL;
@@ -634,7 +630,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
if (always_invalidate) {
srv_mr->iu = rtrs_iu_alloc(1,
sizeof(struct rtrs_msg_rkey_rsp),
- GFP_KERNEL, sess->s.dev->ib_dev,
+ GFP_KERNEL, srv_path->s.dev->ib_dev,
DMA_TO_DEVICE, rtrs_srv_rdma_done);
if (!srv_mr->iu) {
err = -ENOMEM;
@@ -643,8 +639,8 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
}
}
/* Eventually dma addr for each chunk can be cached */
- for_each_sg(sgt->sgl, s, sgt->orig_nents, i)
- sess->dma_addr[chunks + i] = sg_dma_address(s);
+ for_each_sg(sgt->sgl, s, nr_sgt, i)
+ srv_path->dma_addr[chunks + i] = sg_dma_address(s);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
srv_mr->mr = mr;
@@ -652,75 +648,75 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
continue;
err:
while (mri--) {
- srv_mr = &sess->mrs[mri];
+ srv_mr = &srv_path->mrs[mri];
sgt = &srv_mr->sgt;
mr = srv_mr->mr;
- rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
dereg_mr:
ib_dereg_mr(mr);
unmap_sg:
- ib_dma_unmap_sg(sess->s.dev->ib_dev, sgt->sgl,
+ ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
sgt->nents, DMA_BIDIRECTIONAL);
free_sg:
sg_free_table(sgt);
}
- kfree(sess->mrs);
+ kfree(srv_path->mrs);
return err;
}
chunk_bits = ilog2(srv->queue_depth - 1) + 1;
- sess->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
+ srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
return 0;
}
static void rtrs_srv_hb_err_handler(struct rtrs_con *c)
{
- close_sess(to_srv_sess(c->sess));
+ close_path(to_srv_path(c->path));
}
-static void rtrs_srv_init_hb(struct rtrs_srv_sess *sess)
+static void rtrs_srv_init_hb(struct rtrs_srv_path *srv_path)
{
- rtrs_init_hb(&sess->s, &io_comp_cqe,
+ rtrs_init_hb(&srv_path->s, &io_comp_cqe,
RTRS_HB_INTERVAL_MS,
RTRS_HB_MISSED_MAX,
rtrs_srv_hb_err_handler,
rtrs_wq);
}
-static void rtrs_srv_start_hb(struct rtrs_srv_sess *sess)
+static void rtrs_srv_start_hb(struct rtrs_srv_path *srv_path)
{
- rtrs_start_hb(&sess->s);
+ rtrs_start_hb(&srv_path->s);
}
-static void rtrs_srv_stop_hb(struct rtrs_srv_sess *sess)
+static void rtrs_srv_stop_hb(struct rtrs_srv_path *srv_path)
{
- rtrs_stop_hb(&sess->s);
+ rtrs_stop_hb(&srv_path->s);
}
static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
struct rtrs_iu *iu;
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1);
if (wc->status != IB_WC_SUCCESS) {
rtrs_err(s, "Sess info response send failed: %s\n",
ib_wc_status_msg(wc->status));
- close_sess(sess);
+ close_path(srv_path);
return;
}
WARN_ON(wc->opcode != IB_WC_SEND);
}
-static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess)
+static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_srv_ctx *ctx = srv->ctx;
int up;
@@ -731,18 +727,18 @@ static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess)
mutex_unlock(&srv->paths_ev_mutex);
/* Mark session as established */
- sess->established = true;
+ srv_path->established = true;
}
-static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess)
+static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_srv_ctx *ctx = srv->ctx;
- if (!sess->established)
+ if (!srv_path->established)
return;
- sess->established = false;
+ srv_path->established = false;
mutex_lock(&srv->paths_ev_mutex);
WARN_ON(!srv->paths_up);
if (--srv->paths_up == 0)
@@ -750,11 +746,11 @@ static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess)
mutex_unlock(&srv->paths_ev_mutex);
}
-static bool exist_sessname(struct rtrs_srv_ctx *ctx,
- const char *sessname, const uuid_t *path_uuid)
+static bool exist_pathname(struct rtrs_srv_ctx *ctx,
+ const char *pathname, const uuid_t *path_uuid)
{
- struct rtrs_srv *srv;
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_sess *srv;
+ struct rtrs_srv_path *srv_path;
bool found = false;
mutex_lock(&ctx->srv_mutex);
@@ -767,9 +763,9 @@ static bool exist_sessname(struct rtrs_srv_ctx *ctx,
continue;
}
- list_for_each_entry(sess, &srv->paths_list, s.entry) {
- if (strlen(sess->s.sessname) == strlen(sessname) &&
- !strcmp(sess->s.sessname, sessname)) {
+ list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
+ if (strlen(srv_path->s.sessname) == strlen(pathname) &&
+ !strcmp(srv_path->s.sessname, pathname)) {
found = true;
break;
}
@@ -782,14 +778,14 @@ static bool exist_sessname(struct rtrs_srv_ctx *ctx,
return found;
}
-static int post_recv_sess(struct rtrs_srv_sess *sess);
+static int post_recv_path(struct rtrs_srv_path *srv_path);
static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno);
static int process_info_req(struct rtrs_srv_con *con,
struct rtrs_msg_info_req *msg)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
struct ib_send_wr *reg_wr = NULL;
struct rtrs_msg_info_rsp *rsp;
struct rtrs_iu *tx_iu;
@@ -797,31 +793,32 @@ static int process_info_req(struct rtrs_srv_con *con,
int mri, err;
size_t tx_sz;
- err = post_recv_sess(sess);
+ err = post_recv_path(srv_path);
if (err) {
- rtrs_err(s, "post_recv_sess(), err: %d\n", err);
+ rtrs_err(s, "post_recv_path(), err: %d\n", err);
return err;
}
- if (strchr(msg->sessname, '/') || strchr(msg->sessname, '.')) {
- rtrs_err(s, "sessname cannot contain / and .\n");
+ if (strchr(msg->pathname, '/') || strchr(msg->pathname, '.')) {
+ rtrs_err(s, "pathname cannot contain / and .\n");
return -EINVAL;
}
- if (exist_sessname(sess->srv->ctx,
- msg->sessname, &sess->srv->paths_uuid)) {
- rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname);
+ if (exist_pathname(srv_path->srv->ctx,
+ msg->pathname, &srv_path->srv->paths_uuid)) {
+ rtrs_err(s, "pathname is duplicated: %s\n", msg->pathname);
return -EPERM;
}
- strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname));
+ strscpy(srv_path->s.sessname, msg->pathname,
+ sizeof(srv_path->s.sessname));
- rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL);
+ rwr = kcalloc(srv_path->mrs_num, sizeof(*rwr), GFP_KERNEL);
if (!rwr)
return -ENOMEM;
tx_sz = sizeof(*rsp);
- tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num;
- tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev,
+ tx_sz += sizeof(rsp->desc[0]) * srv_path->mrs_num;
+ tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, srv_path->s.dev->ib_dev,
DMA_TO_DEVICE, rtrs_srv_info_rsp_done);
if (!tx_iu) {
err = -ENOMEM;
@@ -830,10 +827,10 @@ static int process_info_req(struct rtrs_srv_con *con,
rsp = tx_iu->buf;
rsp->type = cpu_to_le16(RTRS_MSG_INFO_RSP);
- rsp->sg_cnt = cpu_to_le16(sess->mrs_num);
+ rsp->sg_cnt = cpu_to_le16(srv_path->mrs_num);
- for (mri = 0; mri < sess->mrs_num; mri++) {
- struct ib_mr *mr = sess->mrs[mri].mr;
+ for (mri = 0; mri < srv_path->mrs_num; mri++) {
+ struct ib_mr *mr = srv_path->mrs[mri].mr;
rsp->desc[mri].addr = cpu_to_le64(mr->iova);
rsp->desc[mri].key = cpu_to_le32(mr->rkey);
@@ -854,13 +851,13 @@ static int process_info_req(struct rtrs_srv_con *con,
reg_wr = &rwr[mri].wr;
}
- err = rtrs_srv_create_sess_files(sess);
+ err = rtrs_srv_create_path_files(srv_path);
if (err)
goto iu_free;
- kobject_get(&sess->kobj);
- get_device(&sess->srv->dev);
- rtrs_srv_change_state(sess, RTRS_SRV_CONNECTED);
- rtrs_srv_start_hb(sess);
+ kobject_get(&srv_path->kobj);
+ get_device(&srv_path->srv->dev);
+ rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
+ rtrs_srv_start_hb(srv_path);
/*
* We do not account number of established connections at the current
@@ -868,9 +865,10 @@ static int process_info_req(struct rtrs_srv_con *con,
* all connections are successfully established. Thus, simply notify
* listener with a proper event if we are the first path.
*/
- rtrs_srv_sess_up(sess);
+ rtrs_srv_path_up(srv_path);
- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr,
+ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
+ tx_iu->dma_addr,
tx_iu->size, DMA_TO_DEVICE);
/* Send info response */
@@ -878,7 +876,7 @@ static int process_info_req(struct rtrs_srv_con *con,
if (err) {
rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err);
iu_free:
- rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(tx_iu, srv_path->s.dev->ib_dev, 1);
}
rwr_free:
kfree(rwr);
@@ -889,8 +887,8 @@ rwr_free:
static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
struct rtrs_msg_info_req *msg;
struct rtrs_iu *iu;
int err;
@@ -910,7 +908,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
wc->byte_len);
goto close;
}
- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
+ ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev, iu->dma_addr,
iu->size, DMA_FROM_DEVICE);
msg = iu->buf;
if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ) {
@@ -923,22 +921,22 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
goto close;
out:
- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1);
return;
close:
- close_sess(sess);
+ close_path(srv_path);
goto out;
}
static int post_recv_info_req(struct rtrs_srv_con *con)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
struct rtrs_iu *rx_iu;
int err;
rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req),
- GFP_KERNEL, sess->s.dev->ib_dev,
+ GFP_KERNEL, srv_path->s.dev->ib_dev,
DMA_FROM_DEVICE, rtrs_srv_info_req_done);
if (!rx_iu)
return -ENOMEM;
@@ -946,7 +944,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con)
err = rtrs_iu_post_recv(&con->c, rx_iu);
if (err) {
rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err);
- rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(rx_iu, srv_path->s.dev->ib_dev, 1);
return err;
}
@@ -966,20 +964,20 @@ static int post_recv_io(struct rtrs_srv_con *con, size_t q_size)
return 0;
}
-static int post_recv_sess(struct rtrs_srv_sess *sess)
+static int post_recv_path(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
- struct rtrs_sess *s = &sess->s;
+ struct rtrs_srv_sess *srv = srv_path->srv;
+ struct rtrs_path *s = &srv_path->s;
size_t q_size;
int err, cid;
- for (cid = 0; cid < sess->s.con_num; cid++) {
+ for (cid = 0; cid < srv_path->s.con_num; cid++) {
if (cid == 0)
q_size = SERVICE_CON_QUEUE_DEPTH;
else
q_size = srv->queue_depth;
- err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size);
+ err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size);
if (err) {
rtrs_err(s, "post_recv_io(), err: %d\n", err);
return err;
@@ -993,9 +991,9 @@ static void process_read(struct rtrs_srv_con *con,
struct rtrs_msg_rdma_read *msg,
u32 buf_id, u32 off)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_srv_ctx *ctx = srv->ctx;
struct rtrs_srv_op *id;
@@ -1003,10 +1001,10 @@ static void process_read(struct rtrs_srv_con *con,
void *data;
int ret;
- if (sess->state != RTRS_SRV_CONNECTED) {
+ if (srv_path->state != RTRS_SRV_CONNECTED) {
rtrs_err_rl(s,
"Processing read request failed, session is disconnected, sess state %s\n",
- rtrs_srv_state_str(sess->state));
+ rtrs_srv_state_str(srv_path->state));
return;
}
if (msg->sg_cnt != 1 && msg->sg_cnt != 0) {
@@ -1014,9 +1012,9 @@ static void process_read(struct rtrs_srv_con *con,
"Processing read request failed, invalid message\n");
return;
}
- rtrs_srv_get_ops_ids(sess);
- rtrs_srv_update_rdma_stats(sess->stats, off, READ);
- id = sess->ops_ids[buf_id];
+ rtrs_srv_get_ops_ids(srv_path);
+ rtrs_srv_update_rdma_stats(srv_path->stats, off, READ);
+ id = srv_path->ops_ids[buf_id];
id->con = con;
id->dir = READ;
id->msg_id = buf_id;
@@ -1024,7 +1022,7 @@ static void process_read(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(msg->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len,
data + data_len, usr_len);
if (ret) {
@@ -1042,18 +1040,18 @@ send_err_msg:
rtrs_err_rl(s,
"Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n",
buf_id, ret);
- close_sess(sess);
+ close_path(srv_path);
}
- rtrs_srv_put_ops_ids(sess);
+ rtrs_srv_put_ops_ids(srv_path);
}
static void process_write(struct rtrs_srv_con *con,
struct rtrs_msg_rdma_write *req,
u32 buf_id, u32 off)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_srv_ctx *ctx = srv->ctx;
struct rtrs_srv_op *id;
@@ -1061,15 +1059,15 @@ static void process_write(struct rtrs_srv_con *con,
void *data;
int ret;
- if (sess->state != RTRS_SRV_CONNECTED) {
+ if (srv_path->state != RTRS_SRV_CONNECTED) {
rtrs_err_rl(s,
"Processing write request failed, session is disconnected, sess state %s\n",
- rtrs_srv_state_str(sess->state));
+ rtrs_srv_state_str(srv_path->state));
return;
}
- rtrs_srv_get_ops_ids(sess);
- rtrs_srv_update_rdma_stats(sess->stats, off, WRITE);
- id = sess->ops_ids[buf_id];
+ rtrs_srv_get_ops_ids(srv_path);
+ rtrs_srv_update_rdma_stats(srv_path->stats, off, WRITE);
+ id = srv_path->ops_ids[buf_id];
id->con = con;
id->dir = WRITE;
id->msg_id = buf_id;
@@ -1077,7 +1075,7 @@ static void process_write(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(req->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len,
data + data_len, usr_len);
if (ret) {
rtrs_err_rl(s,
@@ -1094,20 +1092,21 @@ send_err_msg:
rtrs_err_rl(s,
"Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n",
buf_id, ret);
- close_sess(sess);
+ close_path(srv_path);
}
- rtrs_srv_put_ops_ids(sess);
+ rtrs_srv_put_ops_ids(srv_path);
}
static void process_io_req(struct rtrs_srv_con *con, void *msg,
u32 id, u32 off)
{
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
struct rtrs_msg_rdma_hdr *hdr;
unsigned int type;
- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, sess->dma_addr[id],
+ ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev,
+ srv_path->dma_addr[id],
max_chunk_size, DMA_BIDIRECTIONAL);
hdr = msg;
type = le16_to_cpu(hdr->type);
@@ -1129,7 +1128,7 @@ static void process_io_req(struct rtrs_srv_con *con, void *msg,
return;
err:
- close_sess(sess);
+ close_path(srv_path);
}
static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1137,16 +1136,16 @@ static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
struct rtrs_srv_mr *mr =
container_of(wc->wr_cqe, typeof(*mr), inv_cqe);
struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_srv_sess *srv = srv_path->srv;
u32 msg_id, off;
void *data;
if (wc->status != IB_WC_SUCCESS) {
rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n",
ib_wc_status_msg(wc->status));
- close_sess(sess);
+ close_path(srv_path);
}
msg_id = mr->msg_id;
off = mr->msg_off;
@@ -1194,9 +1193,9 @@ static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con *con)
static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
- struct rtrs_sess *s = con->c.sess;
- struct rtrs_srv_sess *sess = to_srv_sess(s);
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_path *s = con->c.path;
+ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_srv_sess *srv = srv_path->srv;
u32 imm_type, imm_payload;
int err;
@@ -1206,7 +1205,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
"%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n",
ib_wc_status_msg(wc->status), wc->wr_cqe,
wc->opcode, wc->vendor_err, wc->byte_len);
- close_sess(sess);
+ close_path(srv_path);
}
return;
}
@@ -1222,7 +1221,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
if (err) {
rtrs_err(s, "rtrs_post_recv(), err: %d\n", err);
- close_sess(sess);
+ close_path(srv_path);
break;
}
rtrs_from_imm(be32_to_cpu(wc->ex.imm_data),
@@ -1231,16 +1230,16 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
u32 msg_id, off;
void *data;
- msg_id = imm_payload >> sess->mem_bits;
- off = imm_payload & ((1 << sess->mem_bits) - 1);
+ msg_id = imm_payload >> srv_path->mem_bits;
+ off = imm_payload & ((1 << srv_path->mem_bits) - 1);
if (msg_id >= srv->queue_depth || off >= max_chunk_size) {
rtrs_err(s, "Wrong msg_id %u, off %u\n",
msg_id, off);
- close_sess(sess);
+ close_path(srv_path);
return;
}
if (always_invalidate) {
- struct rtrs_srv_mr *mr = &sess->mrs[msg_id];
+ struct rtrs_srv_mr *mr = &srv_path->mrs[msg_id];
mr->msg_off = off;
mr->msg_id = msg_id;
@@ -1248,7 +1247,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
if (err) {
rtrs_err(s, "rtrs_post_recv(), err: %d\n",
err);
- close_sess(sess);
+ close_path(srv_path);
break;
}
} else {
@@ -1257,10 +1256,10 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
}
} else if (imm_type == RTRS_HB_MSG_IMM) {
WARN_ON(con->c.cid);
- rtrs_send_hb_ack(&sess->s);
+ rtrs_send_hb_ack(&srv_path->s);
} else if (imm_type == RTRS_HB_ACK_IMM) {
WARN_ON(con->c.cid);
- sess->s.hb_missed_cnt = 0;
+ srv_path->s.hb_missed_cnt = 0;
} else {
rtrs_wrn(s, "Unknown IMM type %u\n", imm_type);
}
@@ -1284,22 +1283,23 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
}
/**
- * rtrs_srv_get_sess_name() - Get rtrs_srv peer hostname.
+ * rtrs_srv_get_path_name() - Get rtrs_srv peer hostname.
* @srv: Session
- * @sessname: Sessname buffer
+ * @pathname: Pathname buffer
* @len: Length of sessname buffer
*/
-int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len)
+int rtrs_srv_get_path_name(struct rtrs_srv_sess *srv, char *pathname,
+ size_t len)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
int err = -ENOTCONN;
mutex_lock(&srv->paths_mutex);
- list_for_each_entry(sess, &srv->paths_list, s.entry) {
- if (sess->state != RTRS_SRV_CONNECTED)
+ list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
+ if (srv_path->state != RTRS_SRV_CONNECTED)
continue;
- strscpy(sessname, sess->s.sessname,
- min_t(size_t, sizeof(sess->s.sessname), len));
+ strscpy(pathname, srv_path->s.sessname,
+ min_t(size_t, sizeof(srv_path->s.sessname), len));
err = 0;
break;
}
@@ -1307,50 +1307,51 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len)
return err;
}
-EXPORT_SYMBOL(rtrs_srv_get_sess_name);
+EXPORT_SYMBOL(rtrs_srv_get_path_name);
/**
* rtrs_srv_get_queue_depth() - Get rtrs_srv qdepth.
* @srv: Session
*/
-int rtrs_srv_get_queue_depth(struct rtrs_srv *srv)
+int rtrs_srv_get_queue_depth(struct rtrs_srv_sess *srv)
{
return srv->queue_depth;
}
EXPORT_SYMBOL(rtrs_srv_get_queue_depth);
-static int find_next_bit_ring(struct rtrs_srv_sess *sess)
+static int find_next_bit_ring(struct rtrs_srv_path *srv_path)
{
- struct ib_device *ib_dev = sess->s.dev->ib_dev;
+ struct ib_device *ib_dev = srv_path->s.dev->ib_dev;
int v;
- v = cpumask_next(sess->cur_cq_vector, &cq_affinity_mask);
+ v = cpumask_next(srv_path->cur_cq_vector, &cq_affinity_mask);
if (v >= nr_cpu_ids || v >= ib_dev->num_comp_vectors)
v = cpumask_first(&cq_affinity_mask);
return v;
}
-static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess)
+static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_path *srv_path)
{
- sess->cur_cq_vector = find_next_bit_ring(sess);
+ srv_path->cur_cq_vector = find_next_bit_ring(srv_path);
- return sess->cur_cq_vector;
+ return srv_path->cur_cq_vector;
}
static void rtrs_srv_dev_release(struct device *dev)
{
- struct rtrs_srv *srv = container_of(dev, struct rtrs_srv, dev);
+ struct rtrs_srv_sess *srv = container_of(dev, struct rtrs_srv_sess,
+ dev);
kfree(srv);
}
-static void free_srv(struct rtrs_srv *srv)
+static void free_srv(struct rtrs_srv_sess *srv)
{
int i;
WARN_ON(refcount_read(&srv->refcount));
for (i = 0; i < srv->queue_depth; i++)
- mempool_free(srv->chunks[i], chunk_pool);
+ __free_pages(srv->chunks[i], get_order(max_chunk_size));
kfree(srv->chunks);
mutex_destroy(&srv->paths_mutex);
mutex_destroy(&srv->paths_ev_mutex);
@@ -1358,11 +1359,11 @@ static void free_srv(struct rtrs_srv *srv)
put_device(&srv->dev);
}
-static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
+static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx,
const uuid_t *paths_uuid,
bool first_conn)
{
- struct rtrs_srv *srv;
+ struct rtrs_srv_sess *srv;
int i;
mutex_lock(&ctx->srv_mutex);
@@ -1403,7 +1404,8 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
goto err_free_srv;
for (i = 0; i < srv->queue_depth; i++) {
- srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL);
+ srv->chunks[i] = alloc_pages(GFP_KERNEL,
+ get_order(max_chunk_size));
if (!srv->chunks[i])
goto err_free_chunks;
}
@@ -1416,7 +1418,7 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
err_free_chunks:
while (i--)
- mempool_free(srv->chunks[i], chunk_pool);
+ __free_pages(srv->chunks[i], get_order(max_chunk_size));
kfree(srv->chunks);
err_free_srv:
@@ -1424,7 +1426,7 @@ err_free_srv:
return ERR_PTR(-ENOMEM);
}
-static void put_srv(struct rtrs_srv *srv)
+static void put_srv(struct rtrs_srv_sess *srv)
{
if (refcount_dec_and_test(&srv->refcount)) {
struct rtrs_srv_ctx *ctx = srv->ctx;
@@ -1438,23 +1440,23 @@ static void put_srv(struct rtrs_srv *srv)
}
}
-static void __add_path_to_srv(struct rtrs_srv *srv,
- struct rtrs_srv_sess *sess)
+static void __add_path_to_srv(struct rtrs_srv_sess *srv,
+ struct rtrs_srv_path *srv_path)
{
- list_add_tail(&sess->s.entry, &srv->paths_list);
+ list_add_tail(&srv_path->s.entry, &srv->paths_list);
srv->paths_num++;
WARN_ON(srv->paths_num >= MAX_PATHS_NUM);
}
-static void del_path_from_srv(struct rtrs_srv_sess *sess)
+static void del_path_from_srv(struct rtrs_srv_path *srv_path)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
if (WARN_ON(!srv))
return;
mutex_lock(&srv->paths_mutex);
- list_del(&sess->s.entry);
+ list_del(&srv_path->s.entry);
WARN_ON(!srv->paths_num);
srv->paths_num--;
mutex_unlock(&srv->paths_mutex);
@@ -1484,47 +1486,48 @@ static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b)
}
}
-static bool __is_path_w_addr_exists(struct rtrs_srv *srv,
+static bool __is_path_w_addr_exists(struct rtrs_srv_sess *srv,
struct rdma_addr *addr)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
- list_for_each_entry(sess, &srv->paths_list, s.entry)
- if (!sockaddr_cmp((struct sockaddr *)&sess->s.dst_addr,
+ list_for_each_entry(srv_path, &srv->paths_list, s.entry)
+ if (!sockaddr_cmp((struct sockaddr *)&srv_path->s.dst_addr,
(struct sockaddr *)&addr->dst_addr) &&
- !sockaddr_cmp((struct sockaddr *)&sess->s.src_addr,
+ !sockaddr_cmp((struct sockaddr *)&srv_path->s.src_addr,
(struct sockaddr *)&addr->src_addr))
return true;
return false;
}
-static void free_sess(struct rtrs_srv_sess *sess)
+static void free_path(struct rtrs_srv_path *srv_path)
{
- if (sess->kobj.state_in_sysfs) {
- kobject_del(&sess->kobj);
- kobject_put(&sess->kobj);
+ if (srv_path->kobj.state_in_sysfs) {
+ kobject_del(&srv_path->kobj);
+ kobject_put(&srv_path->kobj);
} else {
- kfree(sess->stats);
- kfree(sess);
+ free_percpu(srv_path->stats->rdma_stats);
+ kfree(srv_path->stats);
+ kfree(srv_path);
}
}
static void rtrs_srv_close_work(struct work_struct *work)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
struct rtrs_srv_con *con;
int i;
- sess = container_of(work, typeof(*sess), close_work);
+ srv_path = container_of(work, typeof(*srv_path), close_work);
- rtrs_srv_destroy_sess_files(sess);
- rtrs_srv_stop_hb(sess);
+ rtrs_srv_destroy_path_files(srv_path);
+ rtrs_srv_stop_hb(srv_path);
- for (i = 0; i < sess->s.con_num; i++) {
- if (!sess->s.con[i])
+ for (i = 0; i < srv_path->s.con_num; i++) {
+ if (!srv_path->s.con[i])
continue;
- con = to_srv_con(sess->s.con[i]);
+ con = to_srv_con(srv_path->s.con[i]);
rdma_disconnect(con->c.cm_id);
ib_drain_qp(con->c.qp);
}
@@ -1533,41 +1536,41 @@ static void rtrs_srv_close_work(struct work_struct *work)
* Degrade ref count to the usual model with a single shared
* atomic_t counter
*/
- percpu_ref_kill(&sess->ids_inflight_ref);
+ percpu_ref_kill(&srv_path->ids_inflight_ref);
/* Wait for all completion */
- wait_for_completion(&sess->complete_done);
+ wait_for_completion(&srv_path->complete_done);
/* Notify upper layer if we are the last path */
- rtrs_srv_sess_down(sess);
+ rtrs_srv_path_down(srv_path);
- unmap_cont_bufs(sess);
- rtrs_srv_free_ops_ids(sess);
+ unmap_cont_bufs(srv_path);
+ rtrs_srv_free_ops_ids(srv_path);
- for (i = 0; i < sess->s.con_num; i++) {
- if (!sess->s.con[i])
+ for (i = 0; i < srv_path->s.con_num; i++) {
+ if (!srv_path->s.con[i])
continue;
- con = to_srv_con(sess->s.con[i]);
+ con = to_srv_con(srv_path->s.con[i]);
rtrs_cq_qp_destroy(&con->c);
rdma_destroy_id(con->c.cm_id);
kfree(con);
}
- rtrs_ib_dev_put(sess->s.dev);
+ rtrs_ib_dev_put(srv_path->s.dev);
- del_path_from_srv(sess);
- put_srv(sess->srv);
- sess->srv = NULL;
- rtrs_srv_change_state(sess, RTRS_SRV_CLOSED);
+ del_path_from_srv(srv_path);
+ put_srv(srv_path->srv);
+ srv_path->srv = NULL;
+ rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSED);
- kfree(sess->dma_addr);
- kfree(sess->s.con);
- free_sess(sess);
+ kfree(srv_path->dma_addr);
+ kfree(srv_path->s.con);
+ free_path(srv_path);
}
-static int rtrs_rdma_do_accept(struct rtrs_srv_sess *sess,
+static int rtrs_rdma_do_accept(struct rtrs_srv_path *srv_path,
struct rdma_cm_id *cm_id)
{
- struct rtrs_srv *srv = sess->srv;
+ struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_msg_conn_rsp msg;
struct rdma_conn_param param;
int err;
@@ -1615,25 +1618,25 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno)
return errno;
}
-static struct rtrs_srv_sess *
-__find_sess(struct rtrs_srv *srv, const uuid_t *sess_uuid)
+static struct rtrs_srv_path *
+__find_path(struct rtrs_srv_sess *srv, const uuid_t *sess_uuid)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
- list_for_each_entry(sess, &srv->paths_list, s.entry) {
- if (uuid_equal(&sess->s.uuid, sess_uuid))
- return sess;
+ list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
+ if (uuid_equal(&srv_path->s.uuid, sess_uuid))
+ return srv_path;
}
return NULL;
}
-static int create_con(struct rtrs_srv_sess *sess,
+static int create_con(struct rtrs_srv_path *srv_path,
struct rdma_cm_id *cm_id,
unsigned int cid)
{
- struct rtrs_srv *srv = sess->srv;
- struct rtrs_sess *s = &sess->s;
+ struct rtrs_srv_sess *srv = srv_path->srv;
+ struct rtrs_path *s = &srv_path->s;
struct rtrs_srv_con *con;
u32 cq_num, max_send_wr, max_recv_wr, wr_limit;
@@ -1648,10 +1651,10 @@ static int create_con(struct rtrs_srv_sess *sess,
spin_lock_init(&con->rsp_wr_wait_lock);
INIT_LIST_HEAD(&con->rsp_wr_wait_list);
con->c.cm_id = cm_id;
- con->c.sess = &sess->s;
+ con->c.path = &srv_path->s;
con->c.cid = cid;
atomic_set(&con->c.wr_cnt, 1);
- wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
+ wr_limit = srv_path->s.dev->ib_dev->attrs.max_qp_wr;
if (con->c.cid == 0) {
/*
@@ -1684,10 +1687,10 @@ static int create_con(struct rtrs_srv_sess *sess,
}
cq_num = max_send_wr + max_recv_wr;
atomic_set(&con->c.sq_wr_avail, max_send_wr);
- cq_vector = rtrs_srv_get_next_cq_vector(sess);
+ cq_vector = rtrs_srv_get_next_cq_vector(srv_path);
/* TODO: SOFTIRQ can be faster, but be careful with softirq context */
- err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_num,
+ err = rtrs_cq_qp_create(&srv_path->s, &con->c, 1, cq_vector, cq_num,
max_send_wr, max_recv_wr,
IB_POLL_WORKQUEUE);
if (err) {
@@ -1699,8 +1702,8 @@ static int create_con(struct rtrs_srv_sess *sess,
if (err)
goto free_cqqp;
}
- WARN_ON(sess->s.con[cid]);
- sess->s.con[cid] = &con->c;
+ WARN_ON(srv_path->s.con[cid]);
+ srv_path->s.con[cid] = &con->c;
/*
* Change context from server to current connection. The other
@@ -1719,13 +1722,13 @@ err:
return err;
}
-static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv,
+static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv,
struct rdma_cm_id *cm_id,
unsigned int con_num,
unsigned int recon_cnt,
const uuid_t *uuid)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
int err = -ENOMEM;
char str[NAME_MAX];
struct rtrs_addr path;
@@ -1739,74 +1742,82 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv,
pr_err("Path with same addr exists\n");
goto err;
}
- sess = kzalloc(sizeof(*sess), GFP_KERNEL);
- if (!sess)
+ srv_path = kzalloc(sizeof(*srv_path), GFP_KERNEL);
+ if (!srv_path)
goto err;
- sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
- if (!sess->stats)
+ srv_path->stats = kzalloc(sizeof(*srv_path->stats), GFP_KERNEL);
+ if (!srv_path->stats)
goto err_free_sess;
- sess->stats->sess = sess;
-
- sess->dma_addr = kcalloc(srv->queue_depth, sizeof(*sess->dma_addr),
- GFP_KERNEL);
- if (!sess->dma_addr)
+ srv_path->stats->rdma_stats = alloc_percpu(struct rtrs_srv_stats_rdma_stats);
+ if (!srv_path->stats->rdma_stats)
goto err_free_stats;
- sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
- if (!sess->s.con)
+ srv_path->stats->srv_path = srv_path;
+
+ srv_path->dma_addr = kcalloc(srv->queue_depth,
+ sizeof(*srv_path->dma_addr),
+ GFP_KERNEL);
+ if (!srv_path->dma_addr)
+ goto err_free_percpu;
+
+ srv_path->s.con = kcalloc(con_num, sizeof(*srv_path->s.con),
+ GFP_KERNEL);
+ if (!srv_path->s.con)
goto err_free_dma_addr;
- sess->state = RTRS_SRV_CONNECTING;
- sess->srv = srv;
- sess->cur_cq_vector = -1;
- sess->s.dst_addr = cm_id->route.addr.dst_addr;
- sess->s.src_addr = cm_id->route.addr.src_addr;
+ srv_path->state = RTRS_SRV_CONNECTING;
+ srv_path->srv = srv;
+ srv_path->cur_cq_vector = -1;
+ srv_path->s.dst_addr = cm_id->route.addr.dst_addr;
+ srv_path->s.src_addr = cm_id->route.addr.src_addr;
/* temporary until receiving session-name from client */
- path.src = &sess->s.src_addr;
- path.dst = &sess->s.dst_addr;
+ path.src = &srv_path->s.src_addr;
+ path.dst = &srv_path->s.dst_addr;
rtrs_addr_to_str(&path, str, sizeof(str));
- strscpy(sess->s.sessname, str, sizeof(sess->s.sessname));
-
- sess->s.con_num = con_num;
- sess->s.irq_con_num = con_num;
- sess->s.recon_cnt = recon_cnt;
- uuid_copy(&sess->s.uuid, uuid);
- spin_lock_init(&sess->state_lock);
- INIT_WORK(&sess->close_work, rtrs_srv_close_work);
- rtrs_srv_init_hb(sess);
-
- sess->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd);
- if (!sess->s.dev) {
+ strscpy(srv_path->s.sessname, str, sizeof(srv_path->s.sessname));
+
+ srv_path->s.con_num = con_num;
+ srv_path->s.irq_con_num = con_num;
+ srv_path->s.recon_cnt = recon_cnt;
+ uuid_copy(&srv_path->s.uuid, uuid);
+ spin_lock_init(&srv_path->state_lock);
+ INIT_WORK(&srv_path->close_work, rtrs_srv_close_work);
+ rtrs_srv_init_hb(srv_path);
+
+ srv_path->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd);
+ if (!srv_path->s.dev) {
err = -ENOMEM;
goto err_free_con;
}
- err = map_cont_bufs(sess);
+ err = map_cont_bufs(srv_path);
if (err)
goto err_put_dev;
- err = rtrs_srv_alloc_ops_ids(sess);
+ err = rtrs_srv_alloc_ops_ids(srv_path);
if (err)
goto err_unmap_bufs;
- __add_path_to_srv(srv, sess);
+ __add_path_to_srv(srv, srv_path);
- return sess;
+ return srv_path;
err_unmap_bufs:
- unmap_cont_bufs(sess);
+ unmap_cont_bufs(srv_path);
err_put_dev:
- rtrs_ib_dev_put(sess->s.dev);
+ rtrs_ib_dev_put(srv_path->s.dev);
err_free_con:
- kfree(sess->s.con);
+ kfree(srv_path->s.con);
err_free_dma_addr:
- kfree(sess->dma_addr);
+ kfree(srv_path->dma_addr);
+err_free_percpu:
+ free_percpu(srv_path->stats->rdma_stats);
err_free_stats:
- kfree(sess->stats);
+ kfree(srv_path->stats);
err_free_sess:
- kfree(sess);
+ kfree(srv_path);
err:
return ERR_PTR(err);
}
@@ -1816,8 +1827,8 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
size_t len)
{
struct rtrs_srv_ctx *ctx = cm_id->context;
- struct rtrs_srv_sess *sess;
- struct rtrs_srv *srv;
+ struct rtrs_srv_path *srv_path;
+ struct rtrs_srv_sess *srv;
u16 version, con_num, cid;
u16 recon_cnt;
@@ -1857,16 +1868,16 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
goto reject_w_err;
}
mutex_lock(&srv->paths_mutex);
- sess = __find_sess(srv, &msg->sess_uuid);
- if (sess) {
- struct rtrs_sess *s = &sess->s;
+ srv_path = __find_path(srv, &msg->sess_uuid);
+ if (srv_path) {
+ struct rtrs_path *s = &srv_path->s;
/* Session already holds a reference */
put_srv(srv);
- if (sess->state != RTRS_SRV_CONNECTING) {
+ if (srv_path->state != RTRS_SRV_CONNECTING) {
rtrs_err(s, "Session in wrong state: %s\n",
- rtrs_srv_state_str(sess->state));
+ rtrs_srv_state_str(srv_path->state));
mutex_unlock(&srv->paths_mutex);
goto reject_w_err;
}
@@ -1886,19 +1897,19 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
goto reject_w_err;
}
} else {
- sess = __alloc_sess(srv, cm_id, con_num, recon_cnt,
+ srv_path = __alloc_path(srv, cm_id, con_num, recon_cnt,
&msg->sess_uuid);
- if (IS_ERR(sess)) {
+ if (IS_ERR(srv_path)) {
mutex_unlock(&srv->paths_mutex);
put_srv(srv);
- err = PTR_ERR(sess);
+ err = PTR_ERR(srv_path);
pr_err("RTRS server session allocation failed: %d\n", err);
goto reject_w_err;
}
}
- err = create_con(sess, cm_id, cid);
+ err = create_con(srv_path, cm_id, cid);
if (err) {
- rtrs_err((&sess->s), "create_con(), error %d\n", err);
+ rtrs_err((&srv_path->s), "create_con(), error %d\n", err);
rtrs_rdma_do_reject(cm_id, err);
/*
* Since session has other connections we follow normal way
@@ -1907,9 +1918,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
*/
goto close_and_return_err;
}
- err = rtrs_rdma_do_accept(sess, cm_id);
+ err = rtrs_rdma_do_accept(srv_path, cm_id);
if (err) {
- rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err);
+ rtrs_err((&srv_path->s), "rtrs_rdma_do_accept(), error %d\n", err);
rtrs_rdma_do_reject(cm_id, err);
/*
* Since current connection was successfully added to the
@@ -1929,7 +1940,7 @@ reject_w_err:
close_and_return_err:
mutex_unlock(&srv->paths_mutex);
- close_sess(sess);
+ close_path(srv_path);
return err;
}
@@ -1937,14 +1948,14 @@ close_and_return_err:
static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *ev)
{
- struct rtrs_srv_sess *sess = NULL;
- struct rtrs_sess *s = NULL;
+ struct rtrs_srv_path *srv_path = NULL;
+ struct rtrs_path *s = NULL;
if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
struct rtrs_con *c = cm_id->context;
- s = c->sess;
- sess = to_srv_sess(s);
+ s = c->path;
+ srv_path = to_srv_path(s);
}
switch (ev->event) {
@@ -1968,7 +1979,7 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- close_sess(sess);
+ close_path(srv_path);
break;
default:
pr_err("Ignoring unexpected CM event %s, err %d\n",
@@ -2176,23 +2187,23 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
}
EXPORT_SYMBOL(rtrs_srv_open);
-static void close_sessions(struct rtrs_srv *srv)
+static void close_paths(struct rtrs_srv_sess *srv)
{
- struct rtrs_srv_sess *sess;
+ struct rtrs_srv_path *srv_path;
mutex_lock(&srv->paths_mutex);
- list_for_each_entry(sess, &srv->paths_list, s.entry)
- close_sess(sess);
+ list_for_each_entry(srv_path, &srv->paths_list, s.entry)
+ close_path(srv_path);
mutex_unlock(&srv->paths_mutex);
}
static void close_ctx(struct rtrs_srv_ctx *ctx)
{
- struct rtrs_srv *srv;
+ struct rtrs_srv_sess *srv;
mutex_lock(&ctx->srv_mutex);
list_for_each_entry(srv, &ctx->srv_list, ctx_list)
- close_sessions(srv);
+ close_paths(srv);
mutex_unlock(&ctx->srv_mutex);
flush_workqueue(rtrs_wq);
}
@@ -2256,14 +2267,10 @@ static int __init rtrs_server_init(void)
err);
return err;
}
- chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ,
- get_order(max_chunk_size));
- if (!chunk_pool)
- return -ENOMEM;
rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server");
if (IS_ERR(rtrs_dev_class)) {
err = PTR_ERR(rtrs_dev_class);
- goto out_chunk_pool;
+ goto out_err;
}
rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0);
if (!rtrs_wq) {
@@ -2275,9 +2282,7 @@ static int __init rtrs_server_init(void)
out_dev_class:
class_destroy(rtrs_dev_class);
-out_chunk_pool:
- mempool_destroy(chunk_pool);
-
+out_err:
return err;
}
@@ -2285,7 +2290,6 @@ static void __exit rtrs_server_exit(void)
{
destroy_workqueue(rtrs_wq);
class_destroy(rtrs_dev_class);
- mempool_destroy(chunk_pool);
rtrs_rdma_dev_pd_deinit(&dev_pd);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index 7d403c12faf3..2f8a638e36fa 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -12,6 +12,7 @@
#include <linux/device.h>
#include <linux/refcount.h>
+#include <linux/percpu.h>
#include "rtrs-pri.h"
/*
@@ -29,15 +30,15 @@ enum rtrs_srv_state {
*/
struct rtrs_srv_stats_rdma_stats {
struct {
- atomic64_t cnt;
- atomic64_t size_total;
+ u64 cnt;
+ u64 size_total;
} dir[2];
};
struct rtrs_srv_stats {
- struct kobject kobj_stats;
- struct rtrs_srv_stats_rdma_stats rdma_stats;
- struct rtrs_srv_sess *sess;
+ struct kobject kobj_stats;
+ struct rtrs_srv_stats_rdma_stats __percpu *rdma_stats;
+ struct rtrs_srv_path *srv_path;
};
struct rtrs_srv_con {
@@ -71,9 +72,9 @@ struct rtrs_srv_mr {
struct rtrs_iu *iu; /* send buffer for new rkey msg */
};
-struct rtrs_srv_sess {
- struct rtrs_sess s;
- struct rtrs_srv *srv;
+struct rtrs_srv_path {
+ struct rtrs_path s;
+ struct rtrs_srv_sess *srv;
struct work_struct close_work;
enum rtrs_srv_state state;
spinlock_t state_lock;
@@ -90,7 +91,12 @@ struct rtrs_srv_sess {
struct rtrs_srv_stats *stats;
};
-struct rtrs_srv {
+static inline struct rtrs_srv_path *to_srv_path(struct rtrs_path *s)
+{
+ return container_of(s, struct rtrs_srv_path, s);
+}
+
+struct rtrs_srv_sess {
struct list_head paths_list;
int paths_up;
struct mutex paths_ev_mutex;
@@ -125,13 +131,13 @@ struct rtrs_srv_ib_ctx {
extern struct class *rtrs_dev_class;
-void close_sess(struct rtrs_srv_sess *sess);
+void close_path(struct rtrs_srv_path *srv_path);
static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s,
size_t size, int d)
{
- atomic64_inc(&s->rdma_stats.dir[d].cnt);
- atomic64_add(size, &s->rdma_stats.dir[d].size_total);
+ this_cpu_inc(s->rdma_stats->dir[d].cnt);
+ this_cpu_add(s->rdma_stats->dir[d].size_total, size);
}
/* functions which are implemented in rtrs-srv-stats.c */
@@ -142,7 +148,7 @@ ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats,
char *page, size_t len);
/* functions which are implemented in rtrs-srv-sysfs.c */
-int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess);
-void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess);
+int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path);
+void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path);
#endif /* RTRS_SRV_H */
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 37952c8e768c..ed324b47d93a 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -69,16 +69,16 @@ EXPORT_SYMBOL_GPL(rtrs_iu_free);
int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu)
{
- struct rtrs_sess *sess = con->sess;
+ struct rtrs_path *path = con->path;
struct ib_recv_wr wr;
struct ib_sge list;
list.addr = iu->dma_addr;
list.length = iu->size;
- list.lkey = sess->dev->ib_pd->local_dma_lkey;
+ list.lkey = path->dev->ib_pd->local_dma_lkey;
if (list.length == 0) {
- rtrs_wrn(con->sess,
+ rtrs_wrn(con->path,
"Posting receive work request failed, sg list is empty\n");
return -EINVAL;
}
@@ -126,7 +126,7 @@ static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head,
int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
struct ib_send_wr *head)
{
- struct rtrs_sess *sess = con->sess;
+ struct rtrs_path *path = con->path;
struct ib_send_wr wr;
struct ib_sge list;
@@ -135,7 +135,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
list.addr = iu->dma_addr;
list.length = size;
- list.lkey = sess->dev->ib_pd->local_dma_lkey;
+ list.lkey = path->dev->ib_pd->local_dma_lkey;
wr = (struct ib_send_wr) {
.wr_cqe = &iu->cqe,
@@ -175,7 +175,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
* length error
*/
for (i = 0; i < num_sge; i++)
- if (WARN_ON(sge[i].length == 0))
+ if (WARN_ONCE(sge[i].length == 0, "sg %d is zero length\n", i))
return -EINVAL;
return rtrs_post_send(con->qp, head, &wr.wr, tail);
@@ -188,11 +188,11 @@ static int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con,
struct ib_send_wr *head)
{
struct ib_rdma_wr wr;
- struct rtrs_sess *sess = con->sess;
+ struct rtrs_path *path = con->path;
enum ib_send_flags sflags;
atomic_dec_if_positive(&con->sq_wr_avail);
- sflags = (atomic_inc_return(&con->wr_cnt) % sess->signal_interval) ?
+ sflags = (atomic_inc_return(&con->wr_cnt) % path->signal_interval) ?
0 : IB_SEND_SIGNALED;
wr = (struct ib_rdma_wr) {
@@ -211,12 +211,12 @@ static void qp_event_handler(struct ib_event *ev, void *ctx)
switch (ev->event) {
case IB_EVENT_COMM_EST:
- rtrs_info(con->sess, "QP event %s (%d) received\n",
+ rtrs_info(con->path, "QP event %s (%d) received\n",
ib_event_msg(ev->event), ev->event);
rdma_notify(con->cm_id, IB_EVENT_COMM_EST);
break;
default:
- rtrs_info(con->sess, "Unhandled QP event %s (%d) received\n",
+ rtrs_info(con->path, "Unhandled QP event %s (%d) received\n",
ib_event_msg(ev->event), ev->event);
break;
}
@@ -224,7 +224,7 @@ static void qp_event_handler(struct ib_event *ev, void *ctx)
static bool is_pollqueue(struct rtrs_con *con)
{
- return con->cid >= con->sess->irq_con_num;
+ return con->cid >= con->path->irq_con_num;
}
static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe,
@@ -240,7 +240,7 @@ static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe,
cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx);
if (IS_ERR(cq)) {
- rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n",
+ rtrs_err(con->path, "Creating completion queue failed, errno: %ld\n",
PTR_ERR(cq));
return PTR_ERR(cq);
}
@@ -271,7 +271,7 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
ret = rdma_create_qp(cm_id, pd, &init_attr);
if (ret) {
- rtrs_err(con->sess, "Creating QP failed, err: %d\n", ret);
+ rtrs_err(con->path, "Creating QP failed, err: %d\n", ret);
return ret;
}
con->qp = cm_id->qp;
@@ -290,7 +290,7 @@ static void destroy_cq(struct rtrs_con *con)
con->cq = NULL;
}
-int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
+int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
u32 max_send_sge, int cq_vector, int nr_cqe,
u32 max_send_wr, u32 max_recv_wr,
enum ib_poll_context poll_ctx)
@@ -301,13 +301,13 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
if (err)
return err;
- err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr,
+ err = create_qp(con, path->dev->ib_pd, max_send_wr, max_recv_wr,
max_send_sge);
if (err) {
destroy_cq(con);
return err;
}
- con->sess = sess;
+ con->path = path;
return 0;
}
@@ -323,24 +323,24 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con)
}
EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy);
-static void schedule_hb(struct rtrs_sess *sess)
+static void schedule_hb(struct rtrs_path *path)
{
- queue_delayed_work(sess->hb_wq, &sess->hb_dwork,
- msecs_to_jiffies(sess->hb_interval_ms));
+ queue_delayed_work(path->hb_wq, &path->hb_dwork,
+ msecs_to_jiffies(path->hb_interval_ms));
}
-void rtrs_send_hb_ack(struct rtrs_sess *sess)
+void rtrs_send_hb_ack(struct rtrs_path *path)
{
- struct rtrs_con *usr_con = sess->con[0];
+ struct rtrs_con *usr_con = path->con[0];
u32 imm;
int err;
imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
- err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
+ err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
NULL);
if (err) {
- rtrs_err(sess, "send HB ACK failed, errno: %d\n", err);
- sess->hb_err_handler(usr_con);
+ rtrs_err(path, "send HB ACK failed, errno: %d\n", err);
+ path->hb_err_handler(usr_con);
return;
}
}
@@ -349,63 +349,63 @@ EXPORT_SYMBOL_GPL(rtrs_send_hb_ack);
static void hb_work(struct work_struct *work)
{
struct rtrs_con *usr_con;
- struct rtrs_sess *sess;
+ struct rtrs_path *path;
u32 imm;
int err;
- sess = container_of(to_delayed_work(work), typeof(*sess), hb_dwork);
- usr_con = sess->con[0];
+ path = container_of(to_delayed_work(work), typeof(*path), hb_dwork);
+ usr_con = path->con[0];
- if (sess->hb_missed_cnt > sess->hb_missed_max) {
- rtrs_err(sess, "HB missed max reached.\n");
- sess->hb_err_handler(usr_con);
+ if (path->hb_missed_cnt > path->hb_missed_max) {
+ rtrs_err(path, "HB missed max reached.\n");
+ path->hb_err_handler(usr_con);
return;
}
- if (sess->hb_missed_cnt++) {
+ if (path->hb_missed_cnt++) {
/* Reschedule work without sending hb */
- schedule_hb(sess);
+ schedule_hb(path);
return;
}
- sess->hb_last_sent = ktime_get();
+ path->hb_last_sent = ktime_get();
imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
- err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
+ err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
NULL);
if (err) {
- rtrs_err(sess, "HB send failed, errno: %d\n", err);
- sess->hb_err_handler(usr_con);
+ rtrs_err(path, "HB send failed, errno: %d\n", err);
+ path->hb_err_handler(usr_con);
return;
}
- schedule_hb(sess);
+ schedule_hb(path);
}
-void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
+void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
unsigned int interval_ms, unsigned int missed_max,
void (*err_handler)(struct rtrs_con *con),
struct workqueue_struct *wq)
{
- sess->hb_cqe = cqe;
- sess->hb_interval_ms = interval_ms;
- sess->hb_err_handler = err_handler;
- sess->hb_wq = wq;
- sess->hb_missed_max = missed_max;
- sess->hb_missed_cnt = 0;
- INIT_DELAYED_WORK(&sess->hb_dwork, hb_work);
+ path->hb_cqe = cqe;
+ path->hb_interval_ms = interval_ms;
+ path->hb_err_handler = err_handler;
+ path->hb_wq = wq;
+ path->hb_missed_max = missed_max;
+ path->hb_missed_cnt = 0;
+ INIT_DELAYED_WORK(&path->hb_dwork, hb_work);
}
EXPORT_SYMBOL_GPL(rtrs_init_hb);
-void rtrs_start_hb(struct rtrs_sess *sess)
+void rtrs_start_hb(struct rtrs_path *path)
{
- schedule_hb(sess);
+ schedule_hb(path);
}
EXPORT_SYMBOL_GPL(rtrs_start_hb);
-void rtrs_stop_hb(struct rtrs_sess *sess)
+void rtrs_stop_hb(struct rtrs_path *path)
{
- cancel_delayed_work_sync(&sess->hb_dwork);
- sess->hb_missed_cnt = 0;
+ cancel_delayed_work_sync(&path->hb_dwork);
+ path->hb_missed_cnt = 0;
}
EXPORT_SYMBOL_GPL(rtrs_stop_hb);
@@ -479,7 +479,6 @@ static int rtrs_str_to_sockaddr(const char *addr, size_t len,
*/
int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len)
{
-
switch (addr->sa_family) {
case AF_IB:
return scnprintf(buf, len, "gid:%pI6",
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index 859c79685daf..b48b53a7c143 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -13,9 +13,9 @@
#include <linux/scatterlist.h>
struct rtrs_permit;
-struct rtrs_clt;
+struct rtrs_clt_sess;
struct rtrs_srv_ctx;
-struct rtrs_srv;
+struct rtrs_srv_sess;
struct rtrs_srv_op;
/*
@@ -52,14 +52,14 @@ struct rtrs_clt_ops {
void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev);
};
-struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
- const char *sessname,
+struct rtrs_clt_sess *rtrs_clt_open(struct rtrs_clt_ops *ops,
+ const char *pathname,
const struct rtrs_addr *paths,
size_t path_cnt, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
s16 max_reconnect_attempts, u32 nr_poll_queues);
-void rtrs_clt_close(struct rtrs_clt *sess);
+void rtrs_clt_close(struct rtrs_clt_sess *clt);
enum wait_type {
RTRS_PERMIT_NOWAIT = 0,
@@ -77,11 +77,12 @@ enum rtrs_clt_con_type {
RTRS_IO_CON
};
-struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *sess,
- enum rtrs_clt_con_type con_type,
- enum wait_type wait);
+struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt_sess *sess,
+ enum rtrs_clt_con_type con_type,
+ enum wait_type wait);
-void rtrs_clt_put_permit(struct rtrs_clt *sess, struct rtrs_permit *permit);
+void rtrs_clt_put_permit(struct rtrs_clt_sess *sess,
+ struct rtrs_permit *permit);
/**
* rtrs_clt_req_ops - it holds the request confirmation callback
@@ -98,10 +99,10 @@ struct rtrs_clt_req_ops {
};
int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
- struct rtrs_clt *sess, struct rtrs_permit *permit,
+ struct rtrs_clt_sess *sess, struct rtrs_permit *permit,
const struct kvec *vec, size_t nr, size_t len,
struct scatterlist *sg, unsigned int sg_cnt);
-int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt_sess *clt, unsigned int index);
/**
* rtrs_attrs - RTRS session attributes
@@ -112,7 +113,7 @@ struct rtrs_attrs {
u32 max_segments;
};
-int rtrs_clt_query(struct rtrs_clt *sess, struct rtrs_attrs *attr);
+int rtrs_clt_query(struct rtrs_clt_sess *sess, struct rtrs_attrs *attr);
/*
* Here goes RTRS server API
@@ -138,7 +139,6 @@ struct rtrs_srv_ops {
* @priv: Private data set by rtrs_srv_set_sess_priv()
* @id: internal RTRS operation id
- * @dir: READ/WRITE
* @data: Pointer to (bidirectional) rdma memory area:
* - in case of %RTRS_SRV_RDMA_EV_RECV contains
* data sent by the client
@@ -150,7 +150,7 @@ struct rtrs_srv_ops {
* @usrlen: Size of the user message
*/
int (*rdma_ev)(void *priv,
- struct rtrs_srv_op *id, int dir,
+ struct rtrs_srv_op *id,
void *data, size_t datalen, const void *usr,
size_t usrlen);
/**
@@ -163,7 +163,7 @@ struct rtrs_srv_ops {
* @priv: Private data from user if previously set with
* rtrs_srv_set_sess_priv()
*/
- int (*link_ev)(struct rtrs_srv *sess, enum rtrs_srv_link_ev ev,
+ int (*link_ev)(struct rtrs_srv_sess *sess, enum rtrs_srv_link_ev ev,
void *priv);
};
@@ -173,11 +173,12 @@ void rtrs_srv_close(struct rtrs_srv_ctx *ctx);
bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int errno);
-void rtrs_srv_set_sess_priv(struct rtrs_srv *sess, void *priv);
+void rtrs_srv_set_sess_priv(struct rtrs_srv_sess *sess, void *priv);
-int rtrs_srv_get_sess_name(struct rtrs_srv *sess, char *sessname, size_t len);
+int rtrs_srv_get_path_name(struct rtrs_srv_sess *sess, char *pathname,
+ size_t len);
-int rtrs_srv_get_queue_depth(struct rtrs_srv *sess);
+int rtrs_srv_get_queue_depth(struct rtrs_srv_sess *sess);
int rtrs_addr_to_sockaddr(const char *str, size_t len, u16 port,
struct rtrs_addr *addr);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e174e853f8a4..1075c2ac8fe2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -430,7 +430,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
- if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
@@ -699,7 +699,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
static void srp_path_rec_completion(int status,
struct sa_path_rec *pathrec,
- void *ch_ptr)
+ int num_paths, void *ch_ptr)
{
struct srp_rdma_ch *ch = ch_ptr;
struct srp_target_port *target = ch->target;
@@ -1282,8 +1282,7 @@ struct srp_terminate_context {
int scsi_result;
};
-static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr,
- bool reserved)
+static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr)
{
struct srp_terminate_context *context = context_ptr;
struct srp_target_port *target = context->srp_target;
@@ -1962,7 +1961,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
if (scmnd) {
req = scsi_cmd_priv(scmnd);
scmnd = srp_claim_req(ch, req, NULL, scmnd);
- } else {
+ }
+ if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
"Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
rsp->tag, ch - target->ch, ch->qp->qp_num);
@@ -2789,7 +2789,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
static int srp_abort(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
- struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
+ struct srp_request *req = scsi_cmd_priv(scmnd);
u32 tag;
u16 ch_idx;
struct srp_rdma_ch *ch;
@@ -2797,8 +2797,6 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
- if (!req)
- return SUCCESS;
tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd));
ch_idx = blk_mq_unique_tag_to_hwq(tag);
if (WARN_ON_ONCE(ch_idx >= target->ch_count))
@@ -2991,7 +2989,7 @@ static ssize_t local_ib_port_show(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sysfs_emit(buf, "%d\n", target->srp_host->port);
+ return sysfs_emit(buf, "%u\n", target->srp_host->port);
}
static DEVICE_ATTR_RO(local_ib_port);
@@ -3179,11 +3177,16 @@ static void srp_release_dev(struct device *dev)
struct srp_host *host =
container_of(dev, struct srp_host, dev);
- complete(&host->released);
+ kfree(host);
}
+static struct attribute *srp_class_attrs[];
+
+ATTRIBUTE_GROUPS(srp_class);
+
static struct class srp_class = {
.name = "infiniband_srp",
+ .dev_groups = srp_class_groups,
.dev_release = srp_release_dev
};
@@ -3650,7 +3653,7 @@ static ssize_t add_target_store(struct device *dev,
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
target = host_to_target(target_host);
@@ -3706,8 +3709,8 @@ static ssize_t add_target_store(struct device *dev,
}
if (srp_dev->use_fast_reg) {
- bool gaps_reg = (ibdev->attrs.device_cap_flags &
- IB_DEVICE_SG_GAPS_REG);
+ bool gaps_reg = ibdev->attrs.kernel_cap_flags &
+ IBK_SG_GAPS_REG;
max_sectors_per_mr = srp_dev->max_pages_per_mr <<
(ilog2(srp_dev->mr_page_size) - 9);
@@ -3884,12 +3887,19 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr,
{
struct srp_host *host = container_of(dev, struct srp_host, dev);
- return sysfs_emit(buf, "%d\n", host->port);
+ return sysfs_emit(buf, "%u\n", host->port);
}
static DEVICE_ATTR_RO(port);
-static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
+static struct attribute *srp_class_attrs[] = {
+ &dev_attr_add_target.attr,
+ &dev_attr_ibdev.attr,
+ &dev_attr_port.attr,
+ NULL
+};
+
+static struct srp_host *srp_add_port(struct srp_device *device, u32 port)
{
struct srp_host *host;
@@ -3899,33 +3909,24 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
INIT_LIST_HEAD(&host->target_list);
spin_lock_init(&host->target_lock);
- init_completion(&host->released);
mutex_init(&host->add_target_mutex);
host->srp_dev = device;
host->port = port;
+ device_initialize(&host->dev);
host->dev.class = &srp_class;
host->dev.parent = device->dev->dev.parent;
- dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
- port);
-
- if (device_register(&host->dev))
- goto free_host;
- if (device_create_file(&host->dev, &dev_attr_add_target))
- goto err_class;
- if (device_create_file(&host->dev, &dev_attr_ibdev))
- goto err_class;
- if (device_create_file(&host->dev, &dev_attr_port))
- goto err_class;
+ if (dev_set_name(&host->dev, "srp-%s-%u", dev_name(&device->dev->dev),
+ port))
+ goto put_host;
+ if (device_add(&host->dev))
+ goto put_host;
return host;
-err_class:
- device_unregister(&host->dev);
-
-free_host:
- kfree(host);
-
+put_host:
+ device_del(&host->dev);
+ put_device(&host->dev);
return NULL;
}
@@ -3937,7 +3938,7 @@ static void srp_rename_dev(struct ib_device *device, void *client_data)
list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
char name[IB_DEVICE_NAME_MAX + 8];
- snprintf(name, sizeof(name), "srp-%s-%d",
+ snprintf(name, sizeof(name), "srp-%s-%u",
dev_name(&device->dev), host->port);
device_rename(&host->dev, name);
}
@@ -3949,7 +3950,7 @@ static int srp_add_one(struct ib_device *device)
struct ib_device_attr *attr = &device->attrs;
struct srp_host *host;
int mr_page_shift;
- unsigned int p;
+ u32 p;
u64 max_pages_per_mr;
unsigned int flags = 0;
@@ -4031,12 +4032,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data)
srp_dev = client_data;
list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
- device_unregister(&host->dev);
/*
- * Wait for the sysfs entry to go away, so that no new
- * target ports can be created.
+ * Remove the add_target sysfs entry so that no new target ports
+ * can be created.
*/
- wait_for_completion(&host->released);
+ device_del(&host->dev);
/*
* Remove all target ports.
@@ -4047,12 +4047,14 @@ static void srp_remove_one(struct ib_device *device, void *client_data)
spin_unlock(&host->target_lock);
/*
- * Wait for tl_err and target port removal tasks.
+ * srp_queue_remove_work() queues a call to
+ * srp_remove_target(). The latter function cancels
+ * target->tl_err_work so waiting for the remove works to
+ * finish is sufficient.
*/
- flush_workqueue(system_long_wq);
flush_workqueue(srp_remove_wq);
- kfree(host);
+ put_device(&host->dev);
}
ib_dealloc_pd(srp_dev->pd);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index abccddeea1e3..00b0068fda20 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -92,6 +92,9 @@ enum srp_iu_type {
};
/*
+ * RDMA adapter in the initiator system.
+ *
+ * @dev_list: List of RDMA ports associated with this RDMA adapter (srp_host).
* @mr_page_mask: HCA memory registration page mask.
* @mr_page_size: HCA memory registration page size.
* @mr_max_size: Maximum size in bytes of a single FR registration request.
@@ -109,13 +112,18 @@ struct srp_device {
bool use_fast_reg;
};
+/*
+ * One port of an RDMA adapter in the initiator system.
+ *
+ * @target_list: List of connected target ports (struct srp_target_port).
+ * @target_lock: Protects @target_list.
+ */
struct srp_host {
struct srp_device *srp_dev;
- u8 port;
+ u32 port;
struct device dev;
struct list_head target_list;
spinlock_t target_lock;
- struct completion released;
struct list_head list;
struct mutex add_target_mutex;
};
@@ -183,7 +191,7 @@ struct srp_rdma_ch {
};
/**
- * struct srp_target_port
+ * struct srp_target_port - RDMA port in the SRP target system
* @comp_vector: Completion vector used by the first RDMA channel created for
* this target port.
*/
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index f86ee1c4b970..3c3fae738c3e 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -565,12 +565,9 @@ static int srpt_refresh_port(struct srpt_port *sport)
if (ret)
return ret;
- sport->port_guid_id.wwn.priv = sport;
- srpt_format_guid(sport->port_guid_id.name,
- sizeof(sport->port_guid_id.name),
+ srpt_format_guid(sport->guid_name, ARRAY_SIZE(sport->guid_name),
&sport->gid.global.interface_id);
- sport->port_gid_id.wwn.priv = sport;
- snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name),
+ snprintf(sport->gid_name, ARRAY_SIZE(sport->gid_name),
"0x%016llx%016llx",
be64_to_cpu(sport->gid.global.subnet_prefix),
be64_to_cpu(sport->gid.global.interface_id));
@@ -1424,7 +1421,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
- memcpy(srp_rsp + 1, sense_data, sense_data_len);
+ memcpy(srp_rsp->data, sense_data, sense_data_len);
}
return sizeof(*srp_rsp) + sense_data_len;
@@ -2221,13 +2218,13 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
ch->sport = sport;
- if (ib_cm_id) {
- ch->ib_cm.cm_id = ib_cm_id;
- ib_cm_id->context = ch;
- } else {
+ if (rdma_cm_id) {
ch->using_rdma_cm = true;
ch->rdma_cm.cm_id = rdma_cm_id;
rdma_cm_id->context = ch;
+ } else {
+ ch->ib_cm.cm_id = ib_cm_id;
+ ib_cm_id->context = ch;
}
/*
* ch->rq_size should be at least as large as the initiator queue
@@ -2303,7 +2300,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
goto free_recv_ring;
}
- strlcpy(ch->sess_name, src_addr, sizeof(ch->sess_name));
+ strscpy(ch->sess_name, src_addr, sizeof(ch->sess_name));
snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx",
be64_to_cpu(*(__be64 *)nexus->i_port_id),
be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8)));
@@ -2314,31 +2311,35 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
tag_num = ch->rq_size;
tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */
- mutex_lock(&sport->port_guid_id.mutex);
- list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) {
- if (!IS_ERR_OR_NULL(ch->sess))
- break;
- ch->sess = target_setup_session(&stpg->tpg, tag_num,
+ if (sport->guid_id) {
+ mutex_lock(&sport->guid_id->mutex);
+ list_for_each_entry(stpg, &sport->guid_id->tpg_list, entry) {
+ if (!IS_ERR_OR_NULL(ch->sess))
+ break;
+ ch->sess = target_setup_session(&stpg->tpg, tag_num,
tag_size, TARGET_PROT_NORMAL,
ch->sess_name, ch, NULL);
+ }
+ mutex_unlock(&sport->guid_id->mutex);
}
- mutex_unlock(&sport->port_guid_id.mutex);
- mutex_lock(&sport->port_gid_id.mutex);
- list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) {
- if (!IS_ERR_OR_NULL(ch->sess))
- break;
- ch->sess = target_setup_session(&stpg->tpg, tag_num,
+ if (sport->gid_id) {
+ mutex_lock(&sport->gid_id->mutex);
+ list_for_each_entry(stpg, &sport->gid_id->tpg_list, entry) {
+ if (!IS_ERR_OR_NULL(ch->sess))
+ break;
+ ch->sess = target_setup_session(&stpg->tpg, tag_num,
tag_size, TARGET_PROT_NORMAL, i_port_id,
ch, NULL);
- if (!IS_ERR_OR_NULL(ch->sess))
- break;
- /* Retry without leading "0x" */
- ch->sess = target_setup_session(&stpg->tpg, tag_num,
+ if (!IS_ERR_OR_NULL(ch->sess))
+ break;
+ /* Retry without leading "0x" */
+ ch->sess = target_setup_session(&stpg->tpg, tag_num,
tag_size, TARGET_PROT_NORMAL,
i_port_id + 2, ch, NULL);
+ }
+ mutex_unlock(&sport->gid_id->mutex);
}
- mutex_unlock(&sport->port_gid_id.mutex);
if (IS_ERR_OR_NULL(ch->sess)) {
WARN_ON_ONCE(ch->sess == NULL);
@@ -2983,7 +2984,12 @@ static int srpt_release_sport(struct srpt_port *sport)
return 0;
}
-static struct se_wwn *__srpt_lookup_wwn(const char *name)
+struct port_and_port_id {
+ struct srpt_port *sport;
+ struct srpt_port_id **port_id;
+};
+
+static struct port_and_port_id __srpt_lookup_port(const char *name)
{
struct ib_device *dev;
struct srpt_device *sdev;
@@ -2998,25 +3004,38 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name)
for (i = 0; i < dev->phys_port_cnt; i++) {
sport = &sdev->port[i];
- if (strcmp(sport->port_guid_id.name, name) == 0)
- return &sport->port_guid_id.wwn;
- if (strcmp(sport->port_gid_id.name, name) == 0)
- return &sport->port_gid_id.wwn;
+ if (strcmp(sport->guid_name, name) == 0) {
+ kref_get(&sdev->refcnt);
+ return (struct port_and_port_id){
+ sport, &sport->guid_id};
+ }
+ if (strcmp(sport->gid_name, name) == 0) {
+ kref_get(&sdev->refcnt);
+ return (struct port_and_port_id){
+ sport, &sport->gid_id};
+ }
}
}
- return NULL;
+ return (struct port_and_port_id){};
}
-static struct se_wwn *srpt_lookup_wwn(const char *name)
+/**
+ * srpt_lookup_port() - Look up an RDMA port by name
+ * @name: ASCII port name
+ *
+ * Increments the RDMA port reference count if an RDMA port pointer is returned.
+ * The caller must drop that reference count by calling srpt_port_put_ref().
+ */
+static struct port_and_port_id srpt_lookup_port(const char *name)
{
- struct se_wwn *wwn;
+ struct port_and_port_id papi;
spin_lock(&srpt_dev_lock);
- wwn = __srpt_lookup_wwn(name);
+ papi = __srpt_lookup_port(name);
spin_unlock(&srpt_dev_lock);
- return wwn;
+ return papi;
}
static void srpt_free_srq(struct srpt_device *sdev)
@@ -3101,6 +3120,18 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
return ret;
}
+static void srpt_free_sdev(struct kref *refcnt)
+{
+ struct srpt_device *sdev = container_of(refcnt, typeof(*sdev), refcnt);
+
+ kfree(sdev);
+}
+
+static void srpt_sdev_put(struct srpt_device *sdev)
+{
+ kref_put(&sdev->refcnt, srpt_free_sdev);
+}
+
/**
* srpt_add_one - InfiniBand device addition callback function
* @device: Describes a HCA.
@@ -3119,6 +3150,7 @@ static int srpt_add_one(struct ib_device *device)
if (!sdev)
return -ENOMEM;
+ kref_init(&sdev->refcnt);
sdev->device = device;
mutex_init(&sdev->sdev_mutex);
@@ -3159,7 +3191,7 @@ static int srpt_add_one(struct ib_device *device)
* if this HCA is gone bad and replaced by different HCA
*/
ret = sdev->cm_id ?
- ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0) :
+ ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid)) :
0;
if (ret < 0) {
pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret,
@@ -3182,10 +3214,6 @@ static int srpt_add_one(struct ib_device *device)
sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
sport->port_attrib.use_srq = false;
INIT_WORK(&sport->work, srpt_refresh_port_work);
- mutex_init(&sport->port_guid_id.mutex);
- INIT_LIST_HEAD(&sport->port_guid_id.tpg_list);
- mutex_init(&sport->port_gid_id.mutex);
- INIT_LIST_HEAD(&sport->port_gid_id.tpg_list);
ret = srpt_refresh_port(sport);
if (ret) {
@@ -3214,7 +3242,7 @@ err_ring:
srpt_free_srq(sdev);
ib_dealloc_pd(sdev->pd);
free_dev:
- kfree(sdev);
+ srpt_sdev_put(sdev);
pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev));
return ret;
}
@@ -3258,7 +3286,7 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
ib_dealloc_pd(sdev->pd);
- kfree(sdev);
+ srpt_sdev_put(sdev);
}
static struct ib_client srpt_client = {
@@ -3286,10 +3314,10 @@ static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn)
{
struct srpt_port *sport = wwn->priv;
- if (wwn == &sport->port_guid_id.wwn)
- return &sport->port_guid_id;
- if (wwn == &sport->port_gid_id.wwn)
- return &sport->port_gid_id;
+ if (sport->guid_id && &sport->guid_id->wwn == wwn)
+ return sport->guid_id;
+ if (sport->gid_id && &sport->gid_id->wwn == wwn)
+ return sport->gid_id;
WARN_ON_ONCE(true);
return NULL;
}
@@ -3774,7 +3802,31 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
struct config_group *group,
const char *name)
{
- return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
+ struct port_and_port_id papi = srpt_lookup_port(name);
+ struct srpt_port *sport = papi.sport;
+ struct srpt_port_id *port_id;
+
+ if (!papi.port_id)
+ return ERR_PTR(-EINVAL);
+ if (*papi.port_id) {
+ /* Attempt to create a directory that already exists. */
+ WARN_ON_ONCE(true);
+ return &(*papi.port_id)->wwn;
+ }
+ port_id = kzalloc(sizeof(*port_id), GFP_KERNEL);
+ if (!port_id) {
+ srpt_sdev_put(sport->sdev);
+ return ERR_PTR(-ENOMEM);
+ }
+ mutex_init(&port_id->mutex);
+ INIT_LIST_HEAD(&port_id->tpg_list);
+ port_id->wwn.priv = sport;
+ memcpy(port_id->name, port_id == sport->guid_id ? sport->guid_name :
+ sport->gid_name, ARRAY_SIZE(port_id->name));
+
+ *papi.port_id = port_id;
+
+ return &port_id->wwn;
}
/**
@@ -3783,6 +3835,18 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
*/
static void srpt_drop_tport(struct se_wwn *wwn)
{
+ struct srpt_port_id *port_id = container_of(wwn, typeof(*port_id), wwn);
+ struct srpt_port *sport = wwn->priv;
+
+ if (sport->guid_id == port_id)
+ sport->guid_id = NULL;
+ else if (sport->gid_id == port_id)
+ sport->gid_id = NULL;
+ else
+ WARN_ON_ONCE(true);
+
+ srpt_sdev_put(sport->sdev);
+ kfree(port_id);
}
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 76e66f630c17..4c46b301eea1 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -376,7 +376,7 @@ struct srpt_tpg {
};
/**
- * struct srpt_port_id - information about an RDMA port name
+ * struct srpt_port_id - LIO RDMA port information
* @mutex: Protects @tpg_list changes.
* @tpg_list: TPGs associated with the RDMA port name.
* @wwn: WWN associated with the RDMA port name.
@@ -393,7 +393,7 @@ struct srpt_port_id {
};
/**
- * struct srpt_port - information associated by SRPT with a single IB port
+ * struct srpt_port - SRPT RDMA port information
* @sdev: backpointer to the HCA information.
* @mad_agent: per-port management datagram processing information.
* @enabled: Whether or not this target port is enabled.
@@ -402,8 +402,10 @@ struct srpt_port_id {
* @lid: cached value of the port's lid.
* @gid: cached value of the port's gid.
* @work: work structure for refreshing the aforementioned cached values.
- * @port_guid_id: target port GUID
- * @port_gid_id: target port GID
+ * @guid_name: port name in GUID format.
+ * @guid_id: LIO target port information for the port name in GUID format.
+ * @gid_name: port name in GID format.
+ * @gid_id: LIO target port information for the port name in GID format.
* @port_attrib: Port attributes that can be accessed through configfs.
* @refcount: Number of objects associated with this port.
* @freed_channels: Completion that will be signaled once @refcount becomes 0.
@@ -419,8 +421,10 @@ struct srpt_port {
u32 lid;
union ib_gid gid;
struct work_struct work;
- struct srpt_port_id port_guid_id;
- struct srpt_port_id port_gid_id;
+ char guid_name[64];
+ struct srpt_port_id *guid_id;
+ char gid_name[64];
+ struct srpt_port_id *gid_id;
struct srpt_port_attrib port_attrib;
atomic_t refcount;
struct completion *freed_channels;
@@ -430,6 +434,7 @@ struct srpt_port {
/**
* struct srpt_device - information associated by SRPT with a single HCA
+ * @refcnt: Reference count for this device.
* @device: Backpointer to the struct ib_device managed by the IB core.
* @pd: IB protection domain.
* @lkey: L_Key (local key) with write access to all local memory.
@@ -445,6 +450,7 @@ struct srpt_port {
* @port: Information about the ports owned by this HCA.
*/
struct srpt_device {
+ struct kref refcnt;
struct ib_device *device;
struct ib_pd *pd;
u32 lkey;