aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig43
-rw-r--r--drivers/infiniband/core/Makefile13
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/cache.c294
-rw-r--r--drivers/infiniband/core/cm.c2117
-rw-r--r--drivers/infiniband/core/cm_msgs.h4
-rw-r--r--drivers/infiniband/core/cm_trace.c15
-rw-r--r--drivers/infiniband/core/cm_trace.h414
-rw-r--r--drivers/infiniband/core/cma.c1904
-rw-r--r--drivers/infiniband/core/cma_configfs.c57
-rw-r--r--drivers/infiniband/core/cma_priv.h30
-rw-r--r--drivers/infiniband/core/cma_trace.h60
-rw-r--r--drivers/infiniband/core/core_priv.h103
-rw-r--r--drivers/infiniband/core/counters.c323
-rw-r--r--drivers/infiniband/core/cq.c221
-rw-r--r--drivers/infiniband/core/device.c342
-rw-r--r--drivers/infiniband/core/fmr_pool.c494
-rw-r--r--drivers/infiniband/core/iwcm.c28
-rw-r--r--drivers/infiniband/core/iwcm.h2
-rw-r--r--drivers/infiniband/core/iwpm_msg.c69
-rw-r--r--drivers/infiniband/core/iwpm_util.c88
-rw-r--r--drivers/infiniband/core/iwpm_util.h23
-rw-r--r--drivers/infiniband/core/lag.c137
-rw-r--r--drivers/infiniband/core/mad.c394
-rw-r--r--drivers/infiniband/core/mad_priv.h7
-rw-r--r--drivers/infiniband/core/mad_rmpp.c37
-rw-r--r--drivers/infiniband/core/multicast.c43
-rw-r--r--drivers/infiniband/core/netlink.c2
-rw-r--r--drivers/infiniband/core/nldev.c708
-rw-r--r--drivers/infiniband/core/opa_smi.h4
-rw-r--r--drivers/infiniband/core/rdma_core.c198
-rw-r--r--drivers/infiniband/core/rdma_core.h7
-rw-r--r--drivers/infiniband/core/restrack.c186
-rw-r--r--drivers/infiniband/core/restrack.h10
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c70
-rw-r--r--drivers/infiniband/core/rw.c107
-rw-r--r--drivers/infiniband/core/sa.h2
-rw-r--r--drivers/infiniband/core/sa_query.c584
-rw-r--r--drivers/infiniband/core/security.c28
-rw-r--r--drivers/infiniband/core/smi.c12
-rw-r--r--drivers/infiniband/core/smi.h4
-rw-r--r--drivers/infiniband/core/sysfs.c1288
-rw-r--r--drivers/infiniband/core/trace.c2
-rw-r--r--drivers/infiniband/core/ucma.c740
-rw-r--r--drivers/infiniband/core/ud_header.c10
-rw-r--r--drivers/infiniband/core/umem.c207
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c234
-rw-r--r--drivers/infiniband/core/umem_odp.c305
-rw-r--r--drivers/infiniband/core/user_mad.c110
-rw-r--r--drivers/infiniband/core/uverbs.h27
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c688
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c71
-rw-r--r--drivers/infiniband/core/uverbs_main.c92
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c126
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c33
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c22
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c32
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c245
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c6
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c389
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c180
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c380
-rw-r--r--drivers/infiniband/core/uverbs_std_types_srq.c234
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c194
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c13
-rw-r--r--drivers/infiniband/core/verbs.c828
-rw-r--r--drivers/infiniband/hw/Makefile3
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig6
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h50
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c389
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h36
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c1274
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h46
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c645
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c1559
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h266
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c554
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h184
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c575
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h264
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c247
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h49
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h192
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig2
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c52
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c22
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c21
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h42
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c105
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c128
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c55
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c26
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h33
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h8
-rw-r--r--drivers/infiniband/hw/efa/efa.h64
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h281
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h47
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c358
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h39
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c111
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h40
-rw-r--r--drivers/infiniband/hw/efa/efa_common_defs.h13
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h289
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c296
-rw-r--r--drivers/infiniband/hw/efa/efa_regs_defs.h30
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c695
-rw-r--r--drivers/infiniband/hw/erdma/Kconfig12
-rw-r--r--drivers/infiniband/hw/erdma/Makefile4
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h289
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c1422
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.h167
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c487
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c201
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c320
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h514
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c605
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c555
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c1459
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h335
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig12
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile4
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c89
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h48
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h45
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c438
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h58
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h50
-rw-r--r--drivers/infiniband/hw/hfi1/common.h112
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c78
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h49
-rw-r--r--drivers/infiniband/hw/hfi1/device.c45
-rw-r--r--drivers/infiniband/hw/hfi1/device.h49
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c286
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c58
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h45
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c45
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h44
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c58
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h48
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c50
-rw-r--r--drivers/infiniband/hw/hfi1/fault.h50
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c81
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c64
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h122
-rw-r--r--drivers/infiniband/hw/hfi1/init.c193
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c60
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c4
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h53
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h171
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_main.c264
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_rx.c92
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c864
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c189
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h49
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c121
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h61
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c89
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h51
-rw-r--r--drivers/infiniband/hw/hfi1/netdev.h105
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c482
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h48
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c77
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c56
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h54
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c60
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c54
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h45
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c86
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h62
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c52
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h44
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c76
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c51
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c120
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h71
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h44
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c639
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c74
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c91
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h44
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h56
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h53
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h45
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h49
-rw-r--r--drivers/infiniband/hw/hfi1/trace_mmu.h45
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h49
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h45
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tid.h6
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h290
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c60
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c52
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c91
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h57
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c44
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c99
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h57
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c209
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h51
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c44
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h51
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h55
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c371
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c44
-rw-r--r--drivers/infiniband/hw/hns/Kconfig21
-rw-r--r--drivers/infiniband/hw/hns/Makefile7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c77
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c361
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c202
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h253
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c588
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h924
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c1003
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h32
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c4714
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h1099
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c8395
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h2204
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c35
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c523
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c1869
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c146
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c1586
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c247
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c541
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig9
-rw-r--r--drivers/infiniband/hw/i40iw/Makefile10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h602
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4422
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h462
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c5198
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1737
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.c821
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.h241
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c852
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2070
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h217
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_p.h128
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c612
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.h131
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c1493
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h188
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_register.h1030
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h101
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h1363
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c1232
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h430
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c1557
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c2789
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h179
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.c85
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.h62
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c756
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.h124
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig12
-rw-r--r--drivers/infiniband/hw/irdma/Makefile27
-rw-r--r--drivers/infiniband/hw/irdma/cm.c4369
-rw-r--r--drivers/infiniband/hw/irdma/cm.h416
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c5589
-rw-r--r--drivers/infiniband/hw/irdma/defs.h1156
-rw-r--r--drivers/infiniband/hw/irdma/hmc.c697
-rw-r--r--drivers/infiniband/hw/irdma/hmc.h169
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2719
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c216
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.h160
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_if.c217
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c150
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.h71
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h154
-rw-r--r--drivers/infiniband/hw/irdma/main.c369
-rw-r--r--drivers/infiniband/hw/irdma/main.h550
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h82
-rw-r--r--drivers/infiniband/hw/irdma/pble.c509
-rw-r--r--drivers/infiniband/hw/irdma/pble.h132
-rw-r--r--drivers/infiniband/hw/irdma/protos.h100
-rw-r--r--drivers/infiniband/hw/irdma/puda.c1733
-rw-r--r--drivers/infiniband/hw/irdma/puda.h185
-rw-r--r--drivers/infiniband/hw/irdma/trace.c112
-rw-r--r--drivers/infiniband/hw/irdma/trace.h3
-rw-r--r--drivers/infiniband/hw/irdma/trace_cm.h460
-rw-r--r--drivers/infiniband/hw/irdma/type.h1535
-rw-r--r--drivers/infiniband/hw/irdma/uda.c265
-rw-r--r--drivers/infiniband/hw/irdma/uda.h87
-rw-r--r--drivers/infiniband/hw/irdma/uda_d.h128
-rw-r--r--drivers/infiniband/hw/irdma/uk.c1617
-rw-r--r--drivers/infiniband/hw/irdma/user.h424
-rw-r--r--drivers/infiniband/hw/irdma/utils.c2651
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c4643
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h243
-rw-r--r--drivers/infiniband/hw/irdma/ws.c406
-rw-r--r--drivers/infiniband/hw/irdma/ws.h41
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c16
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c21
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c171
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c16
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c207
-rw-r--r--drivers/infiniband/hw/mlx4/main.c355
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c84
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h126
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c156
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c398
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c13
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c70
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile32
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c44
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c268
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h16
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c35
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c930
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h17
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c194
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c625
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h45
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c607
-rw-r--r--drivers/infiniband/hw/mlx5/dm.h68
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c14
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c722
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c2635
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h29
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c203
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c186
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h51
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c19
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c92
-rw-r--r--drivers/infiniband/hw/mlx5/main.c4460
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c197
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h842
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c2019
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c661
-rw-r--r--drivers/infiniband/hw/mlx5/qos.c133
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c4548
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h47
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c648
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c121
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.h13
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c74
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h3
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c219
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c222
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c761
-rw-r--r--drivers/infiniband/hw/mlx5/umr.h97
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c1284
-rw-r--r--drivers/infiniband/hw/mlx5/wr.h136
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c15
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c27
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c287
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c298
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h50
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c90
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c24
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c70
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c108
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h21
-rw-r--r--drivers/infiniband/hw/qedr/Kconfig2
-rw-r--r--drivers/infiniband/hw/qedr/main.c110
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h41
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c26
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c17
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.h5
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c726
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h31
-rw-r--r--drivers/infiniband/hw/qib/Kconfig4
-rw-r--r--drivers/infiniband/hw/qib/qib.h52
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c27
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c69
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c26
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c32
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c87
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c74
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c23
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c699
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c33
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c27
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h7
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c45
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h10
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c103
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c131
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h17
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c24
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h24
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c12
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c10
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c70
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c9
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c67
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c9
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c32
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h62
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig6
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c64
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h56
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c57
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c56
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c58
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.h50
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.c49
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.h50
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c236
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.h67
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c47
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c276
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h55
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c48
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c58
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.c44
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h44
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h48
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h46
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h48
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rc.h46
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rvt.h44
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h48
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c146
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h61
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig6
-rw-r--r--drivers/infiniband/sw/rxe/Makefile2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c195
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h52
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c101
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c222
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c70
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h207
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c80
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c143
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h196
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c551
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c498
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c332
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c199
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c781
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h38
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h79
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c640
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h142
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c266
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c92
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h284
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c286
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c403
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c642
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c162
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c135
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c76
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h36
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c546
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h184
-rw-r--r--drivers/infiniband/sw/siw/Kconfig7
-rw-r--r--drivers/infiniband/sw/siw/iwarp.h13
-rw-r--r--drivers/infiniband/sw/siw/siw.h15
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c218
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c73
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c13
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.h5
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c275
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c80
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c85
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c147
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h24
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c53
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c21
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c50
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c91
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c195
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c42
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c12
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c160
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h143
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c188
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c313
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c389
-rw-r--r--drivers/infiniband/ulp/isert/Kconfig2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c498
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h69
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig6
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile3
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h31
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c6
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h1
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c1
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c27
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/Kconfig27
-rw-r--r--drivers/infiniband/ulp/rtrs/Makefile21
-rw-r--r--drivers/infiniband/ulp/rtrs/README213
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c198
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c514
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c15
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h86
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c3190
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h251
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-log.h28
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h409
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c51
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c315
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h88
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c2297
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h154
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c654
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.h188
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c797
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h45
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c333
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h27
568 files changed, 96335 insertions, 81595 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index ade86388434f..aa36ac618e72 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -8,7 +8,7 @@ menuconfig INFINIBAND
depends on !ALPHA
select IRQ_POLL
select DIMLIB
- ---help---
+ help
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
InfiniBand hardware.
@@ -18,7 +18,7 @@ if INFINIBAND
config INFINIBAND_USER_MAD
tristate "InfiniBand userspace MAD support"
depends on INFINIBAND
- ---help---
+ help
Userspace InfiniBand Management Datagram (MAD) support. This
is the kernel side of the userspace MAD support, which allows
userspace processes to send and receive MADs. You will also
@@ -28,7 +28,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
depends on MMU
- ---help---
+ help
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
communication manager (CM). This allows userspace processes
@@ -37,18 +37,11 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
-config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
- bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
- depends on INFINIBAND_USER_ACCESS
- ---help---
- IOCTL based uAPI support for Infiniband is enabled by default for
- new verbs only. This allows userspace to invoke the IOCTL based uAPI
- for current legacy verbs too.
-
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
depends on MMU
+ select DMA_SHARED_BUFFER
default y
config INFINIBAND_ON_DEMAND_PAGING
@@ -56,8 +49,9 @@ config INFINIBAND_ON_DEMAND_PAGING
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
select INTERVAL_TREE
+ select HMM_MIRROR
default y
- ---help---
+ help
On demand paging support for the InfiniBand subsystem.
Together with driver support this allows registration of
memory regions without pinning their pages, fetching the
@@ -67,7 +61,7 @@ config INFINIBAND_ADDR_TRANS
bool "RDMA/CM"
depends on INFINIBAND
default y
- ---help---
+ help
Support for RDMA communication manager (CM).
This allows for a generic connection abstraction over RDMA.
@@ -75,26 +69,30 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
default y
- ---help---
+ help
ConfigFS support for RDMA communication manager (CM).
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.
+config INFINIBAND_VIRT_DMA
+ def_bool !HIGHMEM
+
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
-source "drivers/infiniband/hw/mthca/Kconfig"
-source "drivers/infiniband/hw/qib/Kconfig"
+source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
-source "drivers/infiniband/hw/i40iw/Kconfig"
+source "drivers/infiniband/hw/erdma/Kconfig"
+source "drivers/infiniband/hw/hfi1/Kconfig"
+source "drivers/infiniband/hw/hns/Kconfig"
+source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
+source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
-source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
-source "drivers/infiniband/hw/usnic/Kconfig"
-source "drivers/infiniband/hw/hns/Kconfig"
-source "drivers/infiniband/hw/bnxt_re/Kconfig"
-source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
+source "drivers/infiniband/hw/qib/Kconfig"
+source "drivers/infiniband/hw/usnic/Kconfig"
+source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
@@ -107,6 +105,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/ulp/rtrs/Kconfig"
source "drivers/infiniband/ulp/opa_vnic/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d1b14887960e..8ab4eea5a0a5 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,16 +8,16 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
- device.o fmr_pool.o cache.o netlink.o \
+ device.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
nldev.o restrack.o counters.o ib_core_uverbs.o \
- trace.o
+ trace.o lag.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
-ib_cm-y := cm.o
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
@@ -36,6 +36,9 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o \
- uverbs_std_types_async_fd.o
-ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ uverbs_std_types_async_fd.o \
+ uverbs_std_types_srq.o \
+ uverbs_std_types_wq.o \
+ uverbs_std_types_qp.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1753a9801b70..f253295795f0 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,7 +37,6 @@
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -76,7 +75,9 @@ static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
- .len = sizeof(struct rdma_nla_ls_gid)},
+ .len = sizeof(struct rdma_nla_ls_gid),
+ .validation_type = NLA_VALIDATE_MIN,
+ .min = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
@@ -371,6 +372,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
+ might_sleep();
+
/* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
@@ -645,13 +648,12 @@ static void process_one_req(struct work_struct *_work)
req->callback = NULL;
spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) {
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list.
- */
- cancel_delayed_work(&req->work);
list_del_init(&req->list);
kfree(req);
}
@@ -727,6 +729,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
struct rdma_dev_addr dev_addr = {};
int ret;
+ might_sleep();
+
if (rec->roce.route_resolved)
return 0;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 17bfedd24cc3..4084d05a4510 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -33,7 +33,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/if_vlan.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -46,7 +46,7 @@
struct ib_pkey_cache {
int table_len;
- u16 table[0];
+ u16 table[];
};
struct ib_update_work {
@@ -121,7 +121,7 @@ struct ib_gid_table {
u32 default_gid_indices;
};
-static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port)
{
struct ib_event event;
@@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
}
static const char * const gid_type_str[] = {
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+ * user space compatibility reasons.
+ */
[IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
[IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
};
@@ -193,7 +197,7 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
+static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port)
{
return device->port_data[port].cache.gid;
}
@@ -233,10 +237,10 @@ static void put_gid_ndev(struct rcu_head *head)
static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
{
struct ib_device *device = entry->attr.device;
- u8 port_num = entry->attr.port_num;
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
- dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
+ dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n", __func__,
port_num, entry->attr.index, entry->attr.gid.raw);
write_lock_irq(&table->rwlock);
@@ -278,7 +282,7 @@ static void free_gid_work(struct work_struct *work)
struct ib_gid_table_entry *entry =
container_of(work, struct ib_gid_table_entry, del_work);
struct ib_device *device = entry->attr.device;
- u8 port_num = entry->attr.port_num;
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
mutex_lock(&table->lock);
@@ -319,7 +323,7 @@ static void store_gid_entry(struct ib_gid_table *table,
{
entry->state = GID_TABLE_ENTRY_VALID;
- dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
+ dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n",
__func__, entry->attr.port_num, entry->attr.index,
entry->attr.gid.raw);
@@ -350,7 +354,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
int ret;
if (!attr->ndev) {
- dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
+ dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n",
__func__, attr->port_num, attr->index);
return -EINVAL;
}
@@ -358,7 +362,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
ret = attr->device->ops.add_gid(attr, &entry->context);
if (ret) {
dev_err(&attr->device->dev,
- "%s GID add failed port=%d index=%d\n",
+ "%s GID add failed port=%u index=%u\n",
__func__, attr->port_num, attr->index);
return ret;
}
@@ -375,7 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
* @ix: GID entry index to delete
*
*/
-static void del_gid(struct ib_device *ib_dev, u8 port,
+static void del_gid(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table, int ix)
{
struct roce_gid_ndev_storage *ndev_storage;
@@ -383,7 +387,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
lockdep_assert_held(&table->lock);
- dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
+ dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n", __func__, port,
ix, table->data_vec[ix]->attr.gid.raw);
write_lock_irq(&table->rwlock);
@@ -539,7 +543,7 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr,
unsigned long mask, bool default_gid)
{
@@ -583,7 +587,7 @@ out_unlock:
return ret;
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID |
@@ -594,7 +598,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
}
static int
-_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+_ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr,
unsigned long mask, bool default_gid)
{
@@ -623,7 +627,7 @@ out_unlock:
return ret;
}
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID |
@@ -634,7 +638,7 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
}
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
struct ib_gid_table *table;
@@ -665,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
* rdma_find_gid_by_port - Returns the GID entry attributes when it finds
* a valid GID entry for given search parameters. It searches for the specified
* GID value in the local software cache.
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
- * @port_num: The port number of the device where the GID value should be
- * searched.
+ * @port: The port number of the device where the GID value should be searched.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
*
* Returns sgid attributes if the GID is found with valid reference or
@@ -680,7 +683,7 @@ const struct ib_gid_attr *
rdma_find_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
int local_index;
struct ib_gid_table *table;
@@ -715,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
* rdma_find_gid_by_filter - Returns the GID table attribute where a
* specified GID value occurs
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @port: The port number of the device where the GID value could be
* searched.
@@ -724,13 +727,14 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
+ * @context: Private data to pass into the call-back.
*
* rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
*
*/
const struct ib_gid_attr *rdma_find_gid_by_filter(
- struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+ struct ib_device *ib_dev, const union ib_gid *gid, u32 port,
bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
void *),
void *context)
@@ -801,7 +805,7 @@ static void release_gid_table(struct ib_device *device,
continue;
if (kref_read(&table->data_vec[i]->kref) > 1) {
dev_err(&device->dev,
- "GID entry ref leak for index %d ref=%d\n", i,
+ "GID entry ref leak for index %d ref=%u\n", i,
kref_read(&table->data_vec[i]->kref));
leak = true;
}
@@ -814,7 +818,7 @@ static void release_gid_table(struct ib_device *device,
kfree(table);
}
-static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
+static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
int i;
@@ -830,7 +834,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
mutex_unlock(&table->lock);
}
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
@@ -863,7 +867,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
}
}
-static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
+static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
unsigned int i;
@@ -880,7 +884,7 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static void gid_table_release_one(struct ib_device *ib_dev)
{
- unsigned int p;
+ u32 p;
rdma_for_each_port (ib_dev, p) {
release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
@@ -891,7 +895,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
- unsigned int rdma_port;
+ u32 rdma_port;
rdma_for_each_port (ib_dev, rdma_port) {
table = alloc_gid_table(
@@ -911,7 +915,7 @@ rollback_table_setup:
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- unsigned int p;
+ u32 p;
rdma_for_each_port (ib_dev, p)
cleanup_gid_table_port(ib_dev, p,
@@ -946,12 +950,12 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
* Returns 0 on success or appropriate error code.
*
*/
-int rdma_query_gid(struct ib_device *device, u8 port_num,
+int rdma_query_gid(struct ib_device *device, u32 port_num,
int index, union ib_gid *gid)
{
struct ib_gid_table *table;
unsigned long flags;
- int res = -EINVAL;
+ int res;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
@@ -959,9 +963,15 @@ int rdma_query_gid(struct ib_device *device, u8 port_num,
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- if (index < 0 || index >= table->sz ||
- !is_gid_entry_valid(table->data_vec[index]))
+ if (index < 0 || index >= table->sz) {
+ res = -EINVAL;
goto done;
+ }
+
+ if (!is_gid_entry_valid(table->data_vec[index])) {
+ res = -ENOENT;
+ goto done;
+ }
memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
res = 0;
@@ -973,6 +983,23 @@ done:
EXPORT_SYMBOL(rdma_query_gid);
/**
+ * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
+ * @attr: Potinter to the GID attribute
+ *
+ * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
+ * to the SGID attr. Callers are required to already be holding the reference
+ * to an existing GID entry.
+ *
+ * Returns the HW GID context
+ *
+ */
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
+{
+ return container_of(attr, struct ib_gid_table_entry, attr)->context;
+}
+EXPORT_SYMBOL(rdma_read_gid_hw_context);
+
+/**
* rdma_find_gid - Returns SGID attributes if the matching GID is found.
* @device: The device to query.
* @gid: The GID value to search for.
@@ -993,7 +1020,7 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
- unsigned int p;
+ u32 p;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -1022,7 +1049,7 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
EXPORT_SYMBOL(rdma_find_gid);
int ib_get_cached_pkey(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
int index,
u16 *pkey)
{
@@ -1037,7 +1064,7 @@ int ib_get_cached_pkey(struct ib_device *device,
cache = device->port_data[port_num].cache.pkey;
- if (index < 0 || index >= cache->table_len)
+ if (!cache || index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
@@ -1048,27 +1075,19 @@ int ib_get_cached_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_pkey);
-int ib_get_cached_subnet_prefix(struct ib_device *device,
- u8 port_num,
- u64 *sn_pfx)
+void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num,
+ u64 *sn_pfx)
{
unsigned long flags;
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
-
read_lock_irqsave(&device->cache_lock, flags);
*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
read_unlock_irqrestore(&device->cache_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
-int ib_find_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+int ib_find_cached_pkey(struct ib_device *device, u32 port_num,
+ u16 pkey, u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
@@ -1082,6 +1101,10 @@ int ib_find_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1091,8 +1114,9 @@ int ib_find_cached_pkey(struct ib_device *device,
*index = i;
ret = 0;
break;
- } else
+ } else {
partial_ix = i;
+ }
}
if (ret && partial_ix >= 0) {
@@ -1100,16 +1124,15 @@ int ib_find_cached_pkey(struct ib_device *device,
ret = 0;
}
+err:
read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
-int ib_find_exact_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num,
+ u16 pkey, u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
@@ -1122,6 +1145,10 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1132,15 +1159,14 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
break;
}
+err:
read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_exact_cached_pkey);
-int ib_get_cached_lmc(struct ib_device *device,
- u8 port_num,
- u8 *lmc)
+int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc)
{
unsigned long flags;
int ret = 0;
@@ -1156,8 +1182,7 @@ int ib_get_cached_lmc(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_lmc);
-int ib_get_cached_port_state(struct ib_device *device,
- u8 port_num,
+int ib_get_cached_port_state(struct ib_device *device, u32 port_num,
enum ib_port_state *port_state)
{
unsigned long flags;
@@ -1191,9 +1216,9 @@ EXPORT_SYMBOL(ib_get_cached_port_state);
* code.
*/
const struct ib_gid_attr *
-rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index)
{
- const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
struct ib_gid_table *table;
unsigned long flags;
@@ -1217,6 +1242,63 @@ done:
EXPORT_SYMBOL(rdma_get_gid_attr);
/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries)
+{
+ const struct ib_gid_attr *gid_attr;
+ ssize_t num_entries = 0, ret;
+ struct ib_gid_table *table;
+ u32 port_num, i;
+ struct net_device *ndev;
+ unsigned long flags;
+
+ rdma_for_each_port(device, port_num) {
+ table = rdma_gid_table(device, port_num);
+ read_lock_irqsave(&table->rwlock, flags);
+ for (i = 0; i < table->sz; i++) {
+ if (!is_gid_entry_valid(table->data_vec[i]))
+ continue;
+ if (num_entries >= max_entries) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ gid_attr = &table->data_vec[i]->attr;
+
+ memcpy(&entries->gid, &gid_attr->gid,
+ sizeof(gid_attr->gid));
+ entries->gid_index = gid_attr->index;
+ entries->port_num = gid_attr->port_num;
+ entries->gid_type = gid_attr->gid_type;
+ ndev = rcu_dereference_protected(
+ gid_attr->ndev,
+ lockdep_is_held(&table->rwlock));
+ if (ndev)
+ entries->netdev_ifindex = ndev->ifindex;
+
+ num_entries++;
+ entries++;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
+
+ return num_entries;
+err:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
+/**
* rdma_put_gid_attr - Release reference to the GID attribute
* @attr: Pointer to the GID attribute whose reference
* needs to be released.
@@ -1272,8 +1354,8 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
- struct net_device *ndev = ERR_PTR(-ENODEV);
- u8 port_num = entry->attr.port_num;
+ struct net_device *ndev = ERR_PTR(-EINVAL);
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
bool valid;
@@ -1284,8 +1366,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid) {
ndev = rcu_dereference(attr->ndev);
- if (!ndev ||
- (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+ if (!ndev)
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
@@ -1293,9 +1374,10 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
}
EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
-static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
+static int get_lower_dev_vlan(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
{
- u16 *vlan_id = data;
+ u16 *vlan_id = (u16 *)priv->data;
if (is_vlan_dev(lower_dev))
*vlan_id = vlan_dev_vlan_id(lower_dev);
@@ -1321,6 +1403,9 @@ static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
u16 *vlan_id, u8 *smac)
{
+ struct netdev_nested_priv priv = {
+ .data = (void *)vlan_id,
+ };
struct net_device *ndev;
rcu_read_lock();
@@ -1341,7 +1426,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,
- get_lower_dev_vlan, vlan_id);
+ get_lower_dev_vlan, &priv);
}
}
rcu_read_unlock();
@@ -1350,7 +1435,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
EXPORT_SYMBOL(rdma_read_gid_l2_fields);
static int config_non_roce_gid_cache(struct ib_device *device,
- u8 port, int gid_tbl_len)
+ u32 port, struct ib_port_attr *tprops)
{
struct ib_gid_attr gid_attr = {};
struct ib_gid_table *table;
@@ -1362,7 +1447,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
table = rdma_gid_table(device, port);
mutex_lock(&table->lock);
- for (i = 0; i < gid_tbl_len; ++i) {
+ for (i = 0; i < tprops->gid_tbl_len; ++i) {
if (!device->ops.query_gid)
continue;
ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
@@ -1373,6 +1458,8 @@ static int config_non_roce_gid_cache(struct ib_device *device,
goto err;
}
gid_attr.index = i;
+ tprops->subnet_prefix =
+ be64_to_cpu(gid_attr.gid.global.subnet_prefix);
add_modify_gid(table, &gid_attr);
}
err:
@@ -1381,10 +1468,12 @@ err:
}
static int
-ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
+ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
+ bool update_pkeys, bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
- struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
+ struct ib_pkey_cache *pkey_cache = NULL;
+ struct ib_pkey_cache *old_pkey_cache = NULL;
int i;
int ret;
@@ -1401,38 +1490,44 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
goto err;
}
- if (!rdma_protocol_roce(device, port)) {
+ if (!rdma_protocol_roce(device, port) && update_gids) {
ret = config_non_roce_gid_cache(device, port,
- tprops->gid_tbl_len);
+ tprops);
if (ret)
goto err;
}
- pkey_cache = kmalloc(struct_size(pkey_cache, table,
- tprops->pkey_tbl_len),
- GFP_KERNEL);
- if (!pkey_cache) {
- ret = -ENOMEM;
- goto err;
- }
-
- pkey_cache->table_len = tprops->pkey_tbl_len;
+ update_pkeys &= !!tprops->pkey_tbl_len;
- for (i = 0; i < pkey_cache->table_len; ++i) {
- ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
- if (ret) {
- dev_warn(&device->dev,
- "ib_query_pkey failed (%d) for index %d\n",
- ret, i);
+ if (update_pkeys) {
+ pkey_cache = kmalloc(struct_size(pkey_cache, table,
+ tprops->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!pkey_cache) {
+ ret = -ENOMEM;
goto err;
}
+
+ pkey_cache->table_len = tprops->pkey_tbl_len;
+
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i,
+ pkey_cache->table + i);
+ if (ret) {
+ dev_warn(&device->dev,
+ "ib_query_pkey failed (%d) for index %d\n",
+ ret, i);
+ goto err;
+ }
+ }
}
write_lock_irq(&device->cache_lock);
- old_pkey_cache = device->port_data[port].cache.pkey;
-
- device->port_data[port].cache.pkey = pkey_cache;
+ if (update_pkeys) {
+ old_pkey_cache = device->port_data[port].cache.pkey;
+ device->port_data[port].cache.pkey = pkey_cache;
+ }
device->port_data[port].cache.lmc = tprops->lmc;
device->port_data[port].cache.port_state = tprops->state;
@@ -1464,6 +1559,8 @@ static void ib_cache_event_task(struct work_struct *_work)
* the cache.
*/
ret = ib_cache_update(work->event.device, work->event.element.port_num,
+ work->event.event == IB_EVENT_GID_CHANGE,
+ work->event.event == IB_EVENT_PKEY_CHANGE,
work->enforce_security);
/* GID event is notified already for individual GID entries by
@@ -1527,24 +1624,25 @@ EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device)
{
- unsigned int p;
+ u32 p;
int err;
- rwlock_init(&device->cache_lock);
-
err = gid_table_setup_one(device);
if (err)
return err;
- rdma_for_each_port (device, p)
- ib_cache_update(device, p, true);
+ rdma_for_each_port (device, p) {
+ err = ib_cache_update(device, p, true, true, true);
+ if (err)
+ return err;
+ }
return 0;
}
void ib_cache_release_one(struct ib_device *device)
{
- unsigned int p;
+ u32 p;
/*
* The release function frees all the cache elements.
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 15e99a888427..1f9938a2c475 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -25,8 +25,10 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
+#include <rdma/ib_sysfs.h>
#include "cm_msgs.h"
#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -66,6 +68,8 @@ static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
[IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
[IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+ [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
+ "vendor option is not supported",
};
const char *__attribute_const__ ibcm_reject_msg(int reason)
@@ -80,8 +84,22 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
}
EXPORT_SYMBOL(ibcm_reject_msg);
-static void cm_add_one(struct ib_device *device);
+struct cm_id_private;
+struct cm_work;
+static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work);
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param);
+static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
+ const void *private_data, u8 private_data_len);
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len);
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len);
static struct ib_client cm_client = {
.name = "cm",
@@ -104,8 +122,6 @@ static struct ib_cm {
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
- /* Sync on cm change port state */
- spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -133,77 +149,33 @@ enum {
CM_COUNTER_GROUPS
};
-static char const counter_group_names[CM_COUNTER_GROUPS]
- [sizeof("cm_rx_duplicates")] = {
- "cm_tx_msgs", "cm_tx_retries",
- "cm_rx_msgs", "cm_rx_duplicates"
-};
-
-struct cm_counter_group {
- struct kobject obj;
- atomic_long_t counter[CM_ATTR_COUNT];
-};
-
struct cm_counter_attribute {
- struct attribute attr;
- int index;
-};
-
-#define CM_COUNTER_ATTR(_name, _index) \
-struct cm_counter_attribute cm_##_name##_counter_attr = { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
- .index = _index \
-}
-
-static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
-static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
-static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
-static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
-static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
-static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
-static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
-static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
-static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
-static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
-static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
-
-static struct attribute *cm_counter_default_attrs[] = {
- &cm_req_counter_attr.attr,
- &cm_mra_counter_attr.attr,
- &cm_rej_counter_attr.attr,
- &cm_rep_counter_attr.attr,
- &cm_rtu_counter_attr.attr,
- &cm_dreq_counter_attr.attr,
- &cm_drep_counter_attr.attr,
- &cm_sidr_req_counter_attr.attr,
- &cm_sidr_rep_counter_attr.attr,
- &cm_lap_counter_attr.attr,
- &cm_apr_counter_attr.attr,
- NULL
+ struct ib_port_attribute attr;
+ unsigned short group;
+ unsigned short index;
};
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
- struct kobject port_obj;
- u8 port_num;
- struct list_head cm_priv_prim_list;
- struct list_head cm_priv_altr_list;
- struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
+ u32 port_num;
+ atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
};
struct cm_device {
+ struct kref kref;
struct list_head list;
+ spinlock_t mad_agent_lock;
struct ib_device *ib_device;
u8 ack_delay;
int going_down;
- struct cm_port *port[0];
+ struct cm_port *port[];
};
struct cm_av {
struct cm_port *port;
- union ib_gid dgid;
struct rdma_ah_attr ah_attr;
+ u16 dlid_datapath;
u16 pkey_index;
u8 timeout;
};
@@ -216,7 +188,7 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct sa_path_rec path[0];
+ struct sa_path_rec path[];
};
struct cm_timewait_info {
@@ -235,11 +207,13 @@ struct cm_id_private {
struct rb_node service_node;
struct rb_node sidr_id_node;
+ u32 sidr_slid;
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
refcount_t refcount;
/* Number of clients sharing this ib_cm_id. Only valid for listeners.
- * Protected by the cm.lock spinlock. */
+ * Protected by the cm.lock spinlock.
+ */
int listen_sharecount;
struct rcu_head rcu;
@@ -261,7 +235,6 @@ struct cm_id_private {
__be16 pkey;
u8 private_data_len;
u8 max_cm_retries;
- u8 peer_to_peer;
u8 responder_resources;
u8 initiator_depth;
u8 retry_count;
@@ -269,16 +242,28 @@ struct cm_id_private {
u8 service_timeout;
u8 target_ack_delay;
- struct list_head prim_list;
- struct list_head altr_list;
- /* Indicates that the send port mad is registered and av is set */
- int prim_send_port_not_ready;
- int altr_send_port_not_ready;
-
struct list_head work_list;
atomic_t work_count;
+
+ struct rdma_ucm_ece ece;
};
+static void cm_dev_release(struct kref *kref)
+{
+ struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
+ u32 i;
+
+ rdma_for_each_port(cm_dev->ib_device, i)
+ kfree(cm_dev->port[i - 1]);
+
+ kfree(cm_dev);
+}
+
+static void cm_device_put(struct cm_device *cm_dev)
+{
+ kref_put(&cm_dev->kref, cm_dev_release);
+}
+
static void cm_work_handler(struct work_struct *work);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
@@ -287,52 +272,37 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
complete(&cm_id_priv->comp);
}
-static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
{
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
- struct cm_av *av;
- unsigned long flags, flags2;
- int ret = 0;
- /* don't let the port to be released till the agent is down */
- spin_lock_irqsave(&cm.state_lock, flags2);
- spin_lock_irqsave(&cm.lock, flags);
- if (!cm_id_priv->prim_send_port_not_ready)
- av = &cm_id_priv->av;
- else if (!cm_id_priv->altr_send_port_not_ready &&
- (cm_id_priv->alt_av.port))
- av = &cm_id_priv->alt_av;
- else {
- pr_info("%s: not valid CM id\n", __func__);
- ret = -ENODEV;
- spin_unlock_irqrestore(&cm.lock, flags);
- goto out;
- }
- spin_unlock_irqrestore(&cm.lock, flags);
- /* Make sure the port haven't released the mad yet */
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!cm_id_priv->av.port)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
mad_agent = cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
- pr_info("%s: not a valid MAD agent\n", __func__);
- ret = -ENODEV;
+ m = ERR_PTR(-EINVAL);
goto out;
}
- ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
+
+ ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
if (IS_ERR(ah)) {
- ret = PTR_ERR(ah);
+ m = ERR_CAST(ah);
goto out;
}
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
- av->pkey_index,
+ cm_id_priv->av.pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
rdma_destroy_ah(ah, 0);
- ret = PTR_ERR(m);
goto out;
}
@@ -342,11 +312,49 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
refcount_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
- *msg = m;
out:
- spin_unlock_irqrestore(&cm.state_lock, flags2);
- return ret;
+ spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ return m;
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+ struct cm_id_private *cm_id_priv = msg->context[0];
+
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ cm_deref_id(cm_id_priv);
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
+{
+ struct ib_mad_send_buf *msg;
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return msg;
+ cm_id_priv->msg = msg;
+ return msg;
+}
+
+static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
+{
+ struct cm_id_private *cm_id_priv = msg->context[0];
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!WARN_ON(cm_id_priv->msg != msg))
+ cm_id_priv->msg = NULL;
+
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ cm_deref_id(cm_id_priv);
+ ib_free_send_mad(msg);
}
static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
@@ -373,15 +381,6 @@ static int cm_create_response_msg_ah(struct cm_port *port,
return 0;
}
-static void cm_free_msg(struct ib_mad_send_buf *msg)
-{
- if (msg->ah)
- rdma_destroy_ah(msg->ah, 0);
- if (msg->context[0])
- cm_deref_id(msg->context[0]);
- ib_free_send_mad(msg);
-}
-
static int cm_alloc_response_msg(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc,
struct ib_mad_send_buf **msg)
@@ -395,7 +394,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
if (ret) {
- cm_free_msg(m);
+ ib_free_send_mad(m);
return ret;
}
@@ -403,8 +402,14 @@ static int cm_alloc_response_msg(struct cm_port *port,
return 0;
}
-static void * cm_copy_private_data(const void *private_data,
- u8 private_data_len)
+static void cm_free_response_msg(struct ib_mad_send_buf *msg)
+{
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ ib_free_send_mad(msg);
+}
+
+static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{
void *data;
@@ -428,62 +433,38 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
{
- struct rdma_ah_attr new_ah_attr;
- int ret;
+ struct cm_port *old_port = av->port;
- av->port = port;
- av->pkey_index = wc->pkey_index;
+ if (old_port == port)
+ return;
- /*
- * av->ah_attr might be initialized based on past wc during incoming
- * connect request or while sending out connect request. So initialize
- * a new ah_attr on stack. If initialization fails, old ah_attr is
- * used for sending any responses. If initialization is successful,
- * than new ah_attr is used by overwriting old one.
- */
- ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
- port->port_num, wc,
- grh, &new_ah_attr);
- if (ret)
- return ret;
+ av->port = port;
+ if (old_port)
+ cm_device_put(old_port->cm_dev);
+ if (port)
+ kref_get(&port->cm_dev->kref);
+}
- rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
- return 0;
+static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
+ struct rdma_ah_attr *ah_attr, struct cm_av *av)
+{
+ cm_set_av_port(av, port);
+ av->pkey_index = wc->pkey_index;
+ rdma_move_ah_attr(&av->ah_attr, ah_attr);
}
static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
- av->port = port;
+ cm_set_av_port(av, port);
av->pkey_index = wc->pkey_index;
return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
port->port_num, wc,
grh, &av->ah_attr);
}
-static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
- struct cm_av *av,
- struct cm_port *port)
-{
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&cm.lock, flags);
-
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
-
- spin_unlock_irqrestore(&cm.lock, flags);
- return ret;
-}
-
static struct cm_port *
get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
@@ -527,8 +508,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
static int cm_init_av_by_path(struct sa_path_rec *path,
const struct ib_gid_attr *sgid_attr,
- struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+ struct cm_av *av)
{
struct rdma_ah_attr new_ah_attr;
struct cm_device *cm_dev;
@@ -545,7 +525,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
if (ret)
return ret;
- av->port = port;
+ cm_set_av_port(av, port);
/*
* av->ah_attr might be initialized based on wc or during
@@ -562,36 +542,29 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
return ret;
av->timeout = path->packet_life_time + 1;
-
- ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret) {
- rdma_destroy_ah_attr(&new_ah_attr);
- return ret;
- }
rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
-static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */
+static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src)
{
- int err;
- u32 id;
-
- err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
- xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
-
- cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
- return err;
+ cm_set_av_port(dest, src->port);
+ cm_set_av_port(src, NULL);
+ dest->pkey_index = src->pkey_index;
+ rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr);
+ dest->timeout = src->timeout;
}
-static u32 cm_local_id(__be32 local_id)
+static void cm_destroy_av(struct cm_av *av)
{
- return (__force u32) (local_id ^ cm.random_id_operand);
+ rdma_destroy_ah_attr(&av->ah_attr);
+ cm_set_av_port(av, NULL);
}
-static void cm_free_id(__be32 local_id)
+static u32 cm_local_id(__be32 local_id)
{
- xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
+ return (__force u32) (local_id ^ cm.random_id_operand);
}
static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
@@ -633,22 +606,25 @@ static int be64_gt(__be64 a, __be64 b)
return (__force u64) a > (__force u64) b;
}
-static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+/*
+ * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
+ * if the new ID was inserted, NULL if it could not be inserted due to a
+ * collision, or the existing cm_id_priv ready for shared usage.
+ */
+static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
+ ib_cm_handler shared_handler)
{
struct rb_node **link = &cm.listen_service_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
- __be64 service_mask = cm_id_priv->id.service_mask;
+ unsigned long flags;
+ spin_lock_irqsave(&cm.lock, flags);
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
- if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id) &&
- (cm_id_priv->id.device == cur_cm_id_priv->id.device))
- return cur_cm_id_priv;
if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
link = &(*link)->rb_left;
@@ -658,26 +634,38 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
link = &(*link)->rb_left;
else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
link = &(*link)->rb_right;
- else
- link = &(*link)->rb_right;
+ else {
+ /*
+ * Sharing an ib_cm_id with different handlers is not
+ * supported
+ */
+ if (cur_cm_id_priv->id.cm_handler != shared_handler ||
+ cur_cm_id_priv->id.context ||
+ WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return NULL;
+ }
+ refcount_inc(&cur_cm_id_priv->refcount);
+ cur_cm_id_priv->listen_sharecount++;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cur_cm_id_priv;
+ }
}
+ cm_id_priv->listen_sharecount++;
rb_link_node(&cm_id_priv->service_node, parent, link);
rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
- return NULL;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cm_id_priv;
}
-static struct cm_id_private * cm_find_listen(struct ib_device *device,
- __be64 service_id)
+static struct cm_id_private *cm_find_listen(struct ib_device *device,
+ __be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
- if ((cm_id_priv->id.service_mask & service_id) ==
- cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device))
- return cm_id_priv;
if (device < cm_id_priv->id.device)
node = node->rb_left;
@@ -687,14 +675,16 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
node = node->rb_left;
else if (be64_gt(service_id, cm_id_priv->id.service_id))
node = node->rb_right;
- else
- node = node->rb_right;
+ else {
+ refcount_inc(&cm_id_priv->refcount);
+ return cm_id_priv;
+ }
}
return NULL;
}
-static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_id(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_id_table.rb_node;
struct rb_node *parent = NULL;
@@ -723,12 +713,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
return NULL;
}
-static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
- __be32 remote_id)
+static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
{
struct rb_node *node = cm.remote_id_table.rb_node;
struct cm_timewait_info *timewait_info;
+ struct cm_id_private *res = NULL;
+ spin_lock_irq(&cm.lock);
while (node) {
timewait_info = rb_entry(node, struct cm_timewait_info,
remote_id_node);
@@ -740,14 +732,18 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
node = node->rb_left;
else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
node = node->rb_right;
- else
- return timewait_info;
+ else {
+ res = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ break;
+ }
}
- return NULL;
+ spin_unlock_irq(&cm.lock);
+ return res;
}
-static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_qpn(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_qp_table.rb_node;
struct rb_node *parent = NULL;
@@ -776,13 +772,12 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
return NULL;
}
-static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
- *cm_id_priv)
+static struct cm_id_private *
+cm_insert_remote_sidr(struct cm_id_private *cm_id_priv)
{
struct rb_node **link = &cm.remote_sidr_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
- union ib_gid *port_gid = &cm_id_priv->av.dgid;
__be32 remote_id = cm_id_priv->id.remote_id;
while (*link) {
@@ -794,12 +789,9 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
link = &(*link)->rb_right;
else {
- int cmp;
- cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
- sizeof *port_gid);
- if (cmp < 0)
+ if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid)
link = &(*link)->rb_left;
- else if (cmp > 0)
+ else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid)
link = &(*link)->rb_right;
else
return cur_cm_id_priv;
@@ -810,21 +802,12 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
return NULL;
}
-static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
- enum ib_cm_sidr_status status)
-{
- struct ib_cm_sidr_rep_param param;
-
- memset(&param, 0, sizeof param);
- param.status = status;
- ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
-}
-
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
- ib_cm_handler cm_handler,
- void *context)
+static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
{
struct cm_id_private *cm_id_priv;
+ u32 id;
int ret;
cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
@@ -836,26 +819,54 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
- ret = cm_alloc_id(cm_id_priv);
- if (ret)
- goto error;
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
- INIT_LIST_HEAD(&cm_id_priv->prim_list);
- INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
refcount_set(&cm_id_priv->refcount, 1);
- return &cm_id_priv->id;
+
+ ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
+ &cm.local_id_next, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+
+ return cm_id_priv;
error:
kfree(cm_id_priv);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Make the ID visible to the MAD handlers and other threads that use the
+ * xarray.
+ */
+static void cm_finalize_id(struct cm_id_private *cm_id_priv)
+{
+ xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+ cm_id_priv, GFP_ATOMIC);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
+
+ cm_finalize_id(cm_id_priv);
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_create_cm_id);
-static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+static struct cm_work *cm_dequeue_work(struct cm_id_private *cm_id_priv)
{
struct cm_work *work;
@@ -874,6 +885,36 @@ static void cm_free_work(struct cm_work *work)
kfree(work);
}
+static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+ __releases(&cm_id_priv->lock)
+{
+ bool immediate;
+
+ /*
+ * To deliver the event to the user callback we have the drop the
+ * spinlock, however, we need to ensure that the user callback is single
+ * threaded and receives events in the temporal order. If there are
+ * already events being processed then thread new events onto a list,
+ * the thread currently processing will pick them up.
+ */
+ immediate = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!immediate) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ /*
+ * This routine always consumes incoming reference. Once queued
+ * to the work_list then a reference is held by the thread
+ * currently running cm_process_work() and this reference is not
+ * needed.
+ */
+ cm_deref_id(cm_id_priv);
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (immediate)
+ cm_process_work(cm_id_priv, work);
+}
+
static inline int cm_convert_to_ms(int iba_time)
{
/* approximate conversion to ms from 4.096us x 2^iba_time */
@@ -899,8 +940,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
return min(31, ack_timeout);
}
-static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
+ struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
+
if (timewait_info->inserted_remote_id) {
rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
timewait_info->inserted_remote_id = 0;
@@ -912,7 +955,7 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
}
}
-static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+static struct cm_timewait_info *cm_create_timewait_info(__be32 local_id)
{
struct cm_timewait_info *timewait_info;
@@ -932,12 +975,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
unsigned long flags;
struct cm_device *cm_dev;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
if (!cm_dev)
return;
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -956,6 +1001,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
msecs_to_jiffies(wait_time));
spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * The timewait_info is converted into a work and gets freed during
+ * cm_free_work() in cm_timewait_handler().
+ */
+ BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
cm_id_priv->timewait_info = NULL;
}
@@ -963,10 +1013,12 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_id_priv->id.state = IB_CM_IDLE;
if (cm_id_priv->timewait_info) {
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
@@ -979,104 +1031,116 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
struct cm_work *work;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-retest:
spin_lock_irq(&cm_id_priv->lock);
+retest:
switch (cm_id->state) {
case IB_CM_LISTEN:
- spin_unlock_irq(&cm_id_priv->lock);
-
- spin_lock_irq(&cm.lock);
+ spin_lock(&cm.lock);
if (--cm_id_priv->listen_sharecount > 0) {
/* The id is still shared. */
+ WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
cm_deref_id(cm_id_priv);
- spin_unlock_irq(&cm.lock);
return;
}
+ cm_id->state = IB_CM_IDLE;
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
- spin_unlock_irq(&cm.lock);
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ spin_unlock(&cm.lock);
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_SIDR_REQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
- spin_lock_irq(&cm.lock);
- if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
- rb_erase(&cm_id_priv->sidr_id_node,
- &cm.remote_sidr_table);
- spin_unlock_irq(&cm.lock);
+ cm_send_sidr_rep_locked(cm_id_priv,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_REJECT });
+ /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
+ cm_id->state = IB_CM_IDLE;
break;
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
- &cm_id_priv->id.device->node_guid,
- sizeof cm_id_priv->id.device->node_guid,
- NULL, 0);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
+ &cm_id_priv->id.device->node_guid,
+ sizeof(cm_id_priv->id.device->node_guid),
+ NULL, 0);
break;
case IB_CM_REQ_RCVD:
if (err == -ENOMEM) {
/* Do not reject to allow future retries. */
cm_reset_to_idle(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
} else {
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv,
+ IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+ NULL, 0);
}
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* Fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
+ goto retest;
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
break;
case IB_CM_ESTABLISHED:
- spin_unlock_irq(&cm_id_priv->lock);
- if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+ if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
+ cm_id->state = IB_CM_IDLE;
break;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ }
+ cm_send_dreq_locked(cm_id_priv, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
- break;
+ goto retest;
case IB_CM_DREQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_drep(cm_id, NULL, 0);
+ cm_send_drep_locked(cm_id_priv, NULL, 0);
+ WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
+ goto retest;
+ case IB_CM_TIMEWAIT:
+ /*
+ * The cm_acquire_id in cm_timewait_handler will stop working
+ * once we do xa_erase below, so just move to idle here for
+ * consistency.
+ */
+ cm_id->state = IB_CM_IDLE;
break;
- default:
- spin_unlock_irq(&cm_id_priv->lock);
+ case IB_CM_IDLE:
break;
}
+ WARN_ON(cm_id->state != IB_CM_IDLE);
- spin_lock_irq(&cm.lock);
- if (!list_empty(&cm_id_priv->altr_list) &&
- (!cm_id_priv->altr_send_port_not_ready))
- list_del(&cm_id_priv->altr_list);
- if (!list_empty(&cm_id_priv->prim_list) &&
- (!cm_id_priv->prim_send_port_not_ready))
- list_del(&cm_id_priv->prim_list);
- spin_unlock_irq(&cm.lock);
+ spin_lock(&cm.lock);
+ /* Required for cleanup paths related cm_req_handler() */
+ if (cm_id_priv->timewait_info) {
+ cm_remove_remote(cm_id_priv);
+ kfree(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
+ }
- cm_free_id(cm_id->local_id);
+ WARN_ON(cm_id_priv->listen_sharecount);
+ WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
+ if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
+ rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
- rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
- rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
+ cm_destroy_av(&cm_id_priv->av);
+ cm_destroy_av(&cm_id_priv->alt_av);
kfree(cm_id_priv->private_data);
kfree_rcu(cm_id_priv, rcu);
}
@@ -1087,70 +1151,63 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
}
EXPORT_SYMBOL(ib_destroy_cm_id);
+static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id)
+{
+ if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+ (service_id != IB_CM_ASSIGN_SERVICE_ID))
+ return -EINVAL;
+
+ if (service_id == IB_CM_ASSIGN_SERVICE_ID)
+ cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
+ else
+ cm_id_priv->id.service_id = service_id;
+
+ return 0;
+}
+
/**
- * __ib_cm_listen - Initiates listening on the specified service ID for
+ * ib_cm_listen - Initiates listening on the specified service ID for
* connection and service ID resolution requests.
* @cm_id: Connection identifier associated with the listen request.
* @service_id: Service identifier matched against incoming connection
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
- * @service_mask: Mask applied to service ID used to listen across a
- * range of service IDs. If set to 0, the service ID is matched
- * exactly. This parameter is ignored if %service_id is set to
- * IB_CM_ASSIGN_SERVICE_ID.
*/
-static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
- __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id)
{
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
- int ret = 0;
-
- service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
- service_id &= service_mask;
- if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
- (service_id != IB_CM_ASSIGN_SERVICE_ID))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- if (cm_id->state != IB_CM_IDLE)
- return -EINVAL;
-
- cm_id->state = IB_CM_LISTEN;
- ++cm_id_priv->listen_sharecount;
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
- if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
- cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
- cm_id->service_mask = ~cpu_to_be64(0);
- } else {
- cm_id->service_id = service_id;
- cm_id->service_mask = service_mask;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->id.state != IB_CM_IDLE) {
+ ret = -EINVAL;
+ goto out;
}
- cur_cm_id_priv = cm_insert_listen(cm_id_priv);
- if (cur_cm_id_priv) {
- cm_id->state = IB_CM_IDLE;
- --cm_id_priv->listen_sharecount;
+ ret = cm_init_listen(cm_id_priv, service_id);
+ if (ret)
+ goto out;
+
+ if (!cm_insert_listen(cm_id_priv, NULL)) {
ret = -EBUSY;
+ goto out;
}
- return ret;
-}
-
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
-{
- unsigned long flags;
- int ret;
- spin_lock_irqsave(&cm.lock, flags);
- ret = __ib_cm_listen(cm_id, service_id, service_mask);
- spin_unlock_irqrestore(&cm.lock, flags);
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_cm_listen);
/**
- * Create a new listening ib_cm_id and listen on the given service ID.
+ * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
+ * the given service ID.
*
* If there's an existing ID listening on that same device and service ID,
* return it.
@@ -1169,60 +1226,57 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
ib_cm_handler cm_handler,
__be64 service_id)
{
+ struct cm_id_private *listen_id_priv;
struct cm_id_private *cm_id_priv;
- struct ib_cm_id *cm_id;
- unsigned long flags;
int err = 0;
/* Create an ID in advance, since the creation may sleep */
- cm_id = ib_create_cm_id(device, cm_handler, NULL);
- if (IS_ERR(cm_id))
- return cm_id;
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
- spin_lock_irqsave(&cm.lock, flags);
+ err = cm_init_listen(cm_id_priv, service_id);
+ if (err) {
+ ib_destroy_cm_id(&cm_id_priv->id);
+ return ERR_PTR(err);
+ }
- if (service_id == IB_CM_ASSIGN_SERVICE_ID)
- goto new_id;
-
- /* Find an existing ID */
- cm_id_priv = cm_find_listen(device, service_id);
- if (cm_id_priv) {
- if (cm_id->cm_handler != cm_handler || cm_id->context) {
- /* Sharing an ib_cm_id with different handlers is not
- * supported */
- spin_unlock_irqrestore(&cm.lock, flags);
- ib_destroy_cm_id(cm_id);
+ spin_lock_irq(&cm_id_priv->lock);
+ listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
+ if (listen_id_priv != cm_id_priv) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_destroy_cm_id(&cm_id_priv->id);
+ if (!listen_id_priv)
return ERR_PTR(-EINVAL);
- }
- refcount_inc(&cm_id_priv->refcount);
- ++cm_id_priv->listen_sharecount;
- spin_unlock_irqrestore(&cm.lock, flags);
-
- ib_destroy_cm_id(cm_id);
- cm_id = &cm_id_priv->id;
- return cm_id;
+ return &listen_id_priv->id;
}
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ spin_unlock_irq(&cm_id_priv->lock);
-new_id:
- /* Use newly created ID */
- err = __ib_cm_listen(cm_id, service_id, 0);
-
- spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * A listen ID does not need to be in the xarray since it does not
+ * receive mads, is not placed in the remote_id or remote_qpn rbtree,
+ * and does not enter timewait.
+ */
- if (err) {
- ib_destroy_cm_id(cm_id);
- return ERR_PTR(err);
- }
- return cm_id;
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_cm_insert_listen);
static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
- u64 hi_tid, low_tid;
+ u64 hi_tid = 0, low_tid;
- hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- low_tid = (u64)cm_id_priv->id.local_id;
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ low_tid = (u64)cm_id_priv->id.local_id;
+ if (!cm_id_priv->av.port)
+ return cpu_to_be64(low_tid);
+
+ spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ if (cm_id_priv->av.port->mad_agent)
+ hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+ spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1237,6 +1291,13 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
hdr->tid = tid;
}
+static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
+ __be64 tid, u32 attr_mod)
+{
+ cm_format_mad_hdr(hdr, attr_id, tid);
+ hdr->attr_mod = cpu_to_be32(attr_mod);
+}
+
static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
@@ -1244,13 +1305,14 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct sa_path_rec *pri_path = param->primary_path;
struct sa_path_rec *alt_path = param->alternate_path;
bool pri_ext = false;
+ __be16 lid;
if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
pri_path->opa.slid);
- cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv));
+ cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv), param->ece.attr_mod);
IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
be32_to_cpu(cm_id_priv->id.local_id));
@@ -1303,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg,
htons(ntohl(sa_path_get_dlid(
pri_path)))));
} else {
+
+ if (param->primary_path_inbound) {
+ lid = param->primary_path_inbound->ib.dlid;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(lid));
+ } else
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+
/* Work-around until there's a way to obtain remote LID info */
- IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
- be16_to_cpu(IB_LID_PERMISSIVE));
IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
be16_to_cpu(IB_LID_PERMISSIVE));
}
@@ -1373,6 +1442,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
alt_path->packet_life_time));
}
+ IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
if (param->private_data && param->private_data_len)
IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
@@ -1381,10 +1451,6 @@ static void cm_format_req(struct cm_req_msg *req_msg,
static int cm_validate_req_param(struct ib_cm_req_param *param)
{
- /* peer-to-peer not supported */
- if (param->peer_to_peer)
- return -EINVAL;
-
if (!param->primary_path)
return -EINVAL;
@@ -1407,7 +1473,9 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
int ib_send_cm_req(struct ib_cm_id *cm_id,
struct ib_cm_req_param *param)
{
+ struct cm_av av = {}, alt_av = {};
struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
struct cm_req_msg *req_msg;
unsigned long flags;
int ret;
@@ -1419,10 +1487,9 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
/* Verify that we're not in timewait. */
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_IDLE) {
+ if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1430,22 +1497,23 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
- goto out;
+ cm_id_priv->timewait_info = NULL;
+ return ret;
}
ret = cm_init_av_by_path(param->primary_path,
- param->ppath_sgid_attr, &cm_id_priv->av,
- cm_id_priv);
+ param->ppath_sgid_attr, &av);
if (ret)
- goto error1;
+ return ret;
if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path, NULL,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto error1;
+ &alt_av);
+ if (ret) {
+ cm_destroy_av(&av);
+ return ret;
+ }
}
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
@@ -1458,33 +1526,44 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->pkey = param->primary_path->pkey;
cm_id_priv->qp_type = param->qp_type;
- ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
- if (ret)
- goto error1;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ cm_move_av_from_path(&cm_id_priv->av, &av);
+ if (param->primary_path_outbound)
+ cm_id_priv->av.dlid_datapath =
+ be16_to_cpu(param->primary_path_outbound->ib.dlid);
+
+ if (param->alternate_path)
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
- req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
+ }
+
+ req_msg = (struct cm_req_msg *)msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
- cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+ msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- goto error2;
- }
+ trace_icm_send_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
BUG_ON(cm_id->state != IB_CM_IDLE);
cm_id->state = IB_CM_REQ_SENT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-
-error2: cm_free_msg(cm_id_priv->msg);
-error1: kfree(cm_id_priv->timewait_info);
-out: return ret;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);
@@ -1519,9 +1598,12 @@ static int cm_issue_rej(struct cm_port *port,
IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
return ret;
}
@@ -1534,7 +1616,7 @@ static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
req_msg))));
}
-static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
+static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
struct sa_path_rec *path, union ib_gid *gid)
{
if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
@@ -1545,14 +1627,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
u32 lid;
if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(primary_path,
- IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
- req_msg));
+ sa_path_set_dlid(primary_path, wc->slid);
sa_path_set_slid(primary_path,
IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
req_msg));
@@ -1589,7 +1670,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
primary_path->dgid =
*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
@@ -1647,20 +1729,20 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
if (sa_path_is_roce(alt_path))
alt_path->roce.route_resolved = false;
}
- cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
+ cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
}
static u16 cm_get_bth_pkey(struct cm_work *work)
{
struct ib_device *ib_dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
u16 pkey;
int ret;
ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
if (ret) {
- dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
+ dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n",
port_num, pkey_index, ret);
return 0;
}
@@ -1669,7 +1751,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
}
/**
- * Convert OPA SGID to IB SGID
+ * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
* ULPs (such as IPoIB) do not understand OPA GIDs and will
* reject them as the local_gid will not match the sgid. Therefore,
* change the pathrec's SGID to an IB SGID.
@@ -1681,7 +1763,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
struct sa_path_rec *path)
{
struct ib_device *dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
if (rdma_cap_opa_ah(dev, port_num) &&
(ib_is_opa_gid(&path->sgid))) {
@@ -1734,6 +1816,9 @@ static void cm_format_req_event(struct cm_work *work,
param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
+ param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
+ param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);
+
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}
@@ -1783,17 +1868,17 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg,
static void cm_format_rej(struct cm_rej_msg *rej_msg,
struct cm_id_private *cm_id_priv,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len, enum ib_cm_state state)
{
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
be32_to_cpu(cm_id_priv->id.remote_id));
- switch(cm_id_priv->id.state) {
+ switch (state) {
case IB_CM_REQ_RCVD:
IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
@@ -1834,12 +1919,16 @@ static void cm_dup_req_handler(struct cm_work *work,
struct ib_mad_send_buf *msg = NULL;
int ret;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REQ_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]);
/* Quick state check to discard duplicate REQs. */
- if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
+ spin_unlock_irq(&cm_id_priv->lock);
return;
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
if (ret)
@@ -1854,30 +1943,31 @@ static void cm_dup_req_handler(struct cm_work *work,
cm_id_priv->private_data_len);
break;
case IB_CM_TIMEWAIT:
- cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
- IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv,
+ IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0,
+ IB_CM_TIMEWAIT);
break;
default:
goto unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
return;
unlock: spin_unlock_irq(&cm_id_priv->lock);
-free: cm_free_msg(msg);
+free: cm_free_response_msg(msg);
}
-static struct cm_id_private * cm_match_req(struct cm_work *work,
- struct cm_id_private *cm_id_priv)
+static struct cm_id_private *cm_match_req(struct cm_work *work,
+ struct cm_id_private *cm_id_priv)
{
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
- struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1898,7 +1988,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Check for stale connections. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -1907,8 +1997,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
return NULL;
@@ -1919,19 +2008,14 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
cm_id_priv->id.device,
cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
NULL, 0);
- goto out;
+ return NULL;
}
- refcount_inc(&listen_cm_id_priv->refcount);
- refcount_inc(&cm_id_priv->refcount);
- cm_id_priv->id.state = IB_CM_REQ_RCVD;
- atomic_inc(&cm_id_priv->work_count);
spin_unlock_irq(&cm.lock);
-out:
return listen_cm_id_priv;
}
@@ -1973,7 +2057,6 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
static int cm_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
const struct ib_global_route *grh;
@@ -1982,13 +2065,32 @@ static int cm_req_handler(struct cm_work *work)
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
cm_id_priv->id.remote_id =
cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+ cm_id_priv->tid = req_msg->hdr.tid;
+ cm_id_priv->timeout_ms = cm_convert_to_ms(
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+ cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+ cm_id_priv->remote_qpn =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+ cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
@@ -1998,43 +2100,39 @@ static int cm_req_handler(struct cm_work *work)
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
goto destroy;
}
- cm_id_priv->timewait_info->work.remote_id =
- cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
cm_id_priv->timewait_info->remote_ca_guid =
cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
- cm_id_priv->timewait_info->remote_qpn =
- cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;
+
+ /*
+ * Note that the ID pointer is not in the xarray at this point,
+ * so this set is only visible to the local thread.
+ */
+ cm_id_priv->id.state = IB_CM_REQ_RCVD;
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
- pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
- be32_to_cpu(cm_id->local_id));
+ trace_icm_no_listener_err(&cm_id_priv->id);
+ cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
- goto free_timeinfo;
+ goto destroy;
}
- cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_id_priv->id.service_id =
- cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
-
- cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
-
memset(&work->path[0], 0, sizeof(work->path[0]));
if (cm_req_has_alt_path(req_msg))
memset(&work->path[1], 0, sizeof(work->path[1]));
grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
gid_attr = grh->sgid_attr;
- if (gid_attr &&
- rdma_protocol_roce(work->port->cm_dev->ib_device,
- work->port->port_num)) {
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) {
work->path[0].rec_type =
sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
+ cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_path_set_rec_type(
work->port->cm_dev->ib_device, work->port->port_num,
&work->path[0],
@@ -2044,13 +2142,15 @@ static int cm_req_handler(struct cm_work *work)
if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
+ &work->path[1], work->mad_recv_wc->wc);
if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
- cm_id_priv);
+
+ /* This destroy call is needed to pair with cm_init_av_for_response */
+ cm_destroy_av(&cm_id_priv->av);
+ ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
if (ret) {
int err;
@@ -2058,54 +2158,55 @@ static int cm_req_handler(struct cm_work *work)
work->port->port_num, 0,
&work->path[0].sgid);
if (err)
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
NULL, 0, NULL, 0);
else
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid,
sizeof(work->path[0].sgid),
NULL, 0);
goto rejected;
}
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB)
+ cm_id_priv->av.dlid_datapath =
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg);
+
if (cm_req_has_alt_path(req_msg)) {
ret = cm_init_av_by_path(&work->path[1], NULL,
- &cm_id_priv->alt_av, cm_id_priv);
+ &cm_id_priv->alt_av);
if (ret) {
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+ ib_send_cm_rej(&cm_id_priv->id,
+ IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
- cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->timeout_ms = cm_convert_to_ms(
- IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
- cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
- cm_id_priv->remote_qpn =
- cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
- cm_id_priv->initiator_depth =
- IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
- cm_id_priv->responder_resources =
- IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
- cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
- cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
- cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
- cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
- cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
- cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
+
+ /* Now MAD handlers can see the new ID */
+ spin_lock_irq(&cm_id_priv->lock);
+ cm_finalize_id(cm_id_priv);
+
+ /* Refcount belongs to the event, pairs with cm_process_work() */
+ refcount_inc(&cm_id_priv->refcount);
+ cm_queue_work_unlock(cm_id_priv, work);
+ /*
+ * Since this ID was just created and was not made visible to other MAD
+ * handlers until the cm_finalize_id() above we know that the
+ * cm_process_work() will deliver the event and the listen_cm_id
+ * embedded in the event can be derefed here.
+ */
cm_deref_id(listen_cm_id_priv);
return 0;
rejected:
- refcount_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
-free_timeinfo:
- kfree(cm_id_priv->timewait_info);
destroy:
- ib_destroy_cm_id(cm_id);
+ ib_destroy_cm_id(&cm_id_priv->id);
return ret;
}
@@ -2113,7 +2214,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_rep_param *param)
{
- cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
+ cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid,
+ param->ece.attr_mod);
IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
be32_to_cpu(cm_id_priv->id.local_id));
IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
@@ -2140,6 +2242,10 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
}
+ IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id);
+ IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8);
+ IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16);
+
if (param->private_data && param->private_data_len)
IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
param->private_data_len);
@@ -2162,36 +2268,42 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto out;
+ }
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_REP_SENT;
- cm_id_priv->msg = msg;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ WARN_ONCE(param->qp_num & 0xFF000000,
+ "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
+ param->qp_num);
cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out_free:
+ cm_free_priv_msg(msg);
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rep);
@@ -2233,19 +2345,21 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto error;
+ }
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2284,6 +2398,11 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+ param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg);
+ param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod);
+
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
}
@@ -2302,8 +2421,8 @@ static void cm_dup_rep_handler(struct cm_work *work)
if (!cm_id_priv)
return;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REP_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
if (ret)
goto deref;
@@ -2322,13 +2441,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
goto deref;
unlock: spin_unlock_irq(&cm_id_priv->lock);
-free: cm_free_msg(msg);
+free: cm_free_response_msg(msg);
deref: cm_deref_id(cm_id_priv);
}
@@ -2338,7 +2458,6 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_rep_msg *rep_msg;
int ret;
struct cm_id_private *cur_cm_id_priv;
- struct ib_cm_id *cm_id;
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2346,7 +2465,7 @@ static int cm_rep_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ trace_icm_remote_no_priv_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2359,13 +2478,12 @@ static int cm_rep_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
break;
default:
- spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug(
- "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
+ trace_icm_rep_unknown_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
- IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
+ spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
@@ -2381,16 +2499,14 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ trace_icm_insert_failed_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
/* Check for a stale connection. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- rb_erase(&cm_id_priv->timewait_info->remote_id_node,
- &cm.remote_id_table);
- cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -2400,14 +2516,12 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug(
- "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
@@ -2434,18 +2548,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->alt_av.timeout - 1);
- /* todo: handle peer_to_peer */
-
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
error:
@@ -2456,7 +2560,6 @@ error:
static int cm_establish_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
- int ret;
/* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
@@ -2469,16 +2572,8 @@ static int cm_establish_handler(struct cm_work *work)
goto out;
}
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2489,7 +2584,6 @@ static int cm_rtu_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rtu_msg *rtu_msg;
- int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2505,22 +2599,14 @@ static int cm_rtu_handler(struct cm_work *work)
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
spin_unlock_irq(&cm_id_priv->lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_RTU_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_RTU_COUNTER]);
goto out;
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2546,35 +2632,30 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
private_data_len);
}
-int ib_send_cm_dreq(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
+ const void *private_data, u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED) {
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
- ret = -EINVAL;
- goto out;
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
+ trace_icm_dreq_skipped(&cm_id_priv->id);
+ return -EINVAL;
}
- if (cm_id->lap_state == IB_CM_LAP_SENT ||
- cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->msg);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret) {
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv);
- goto out;
+ return PTR_ERR(msg);
}
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
@@ -2582,17 +2663,29 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
+ cm_free_priv_msg(msg);
return ret;
}
- cm_id->state = IB_CM_DREQ_SENT;
- cm_id_priv->msg = msg;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_id_priv->id.state = IB_CM_DREQ_SENT;
+ return 0;
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_dreq);
@@ -2613,51 +2706,58 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
private_data_len);
}
-int ib_send_cm_drep(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- unsigned long flags;
- void *data;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
return -EINVAL;
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_DREQ_RCVD) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
- __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
+ if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+ trace_icm_send_drep_err(&cm_id_priv->id);
+ kfree(private_data);
return -EINVAL;
}
- cm_set_private_data(cm_id_priv, data, private_data_len);
+ cm_set_private_data(cm_id_priv, private_data, private_data_len);
cm_enter_timewait(cm_id_priv);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
+ return 0;
+}
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ void *data;
+ int ret;
+
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_drep_locked(cm_id_priv, data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_drep);
@@ -2683,9 +2783,12 @@ static int cm_issue_drep(struct cm_port *port,
IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
return ret;
}
@@ -2695,19 +2798,17 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
if (!cm_id_priv) {
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug(
- "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
@@ -2723,18 +2824,17 @@ static int cm_dreq_handler(struct cm_work *work)
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- break;
- case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_TIMEWAIT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
if (IS_ERR(msg))
goto unlock;
@@ -2746,29 +2846,19 @@ static int cm_dreq_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL))
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
goto deref;
case IB_CM_DREQ_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
cm_id_priv->tid = dreq_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2780,7 +2870,6 @@ static int cm_drep_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_drep_msg *drep_msg;
- int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2800,81 +2889,82 @@ static int cm_drep_handler(struct cm_work *work)
}
cm_enter_timewait(cm_id_priv);
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
return -EINVAL;
}
-int ib_send_cm_rej(struct ib_cm_id *cm_id,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
+ enum ib_cm_state state = cm_id_priv->id.state;
struct ib_mad_send_buf *msg;
- unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
(ari && ari_length > IB_CM_REJ_ARI_LENGTH))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state) {
+ switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_reset_to_idle(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_enter_timewait(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
- ret = -EINVAL;
- goto out;
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
+ return -EINVAL;
}
- if (ret)
- goto out;
-
+ trace_icm_send_rej(&cm_id_priv->id, reason);
ret = ib_post_send_mad(msg, NULL);
- if (ret)
+ if (ret) {
cm_free_msg(msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
+ void *ari, u8 ari_length, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length,
+ private_data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rej);
@@ -2893,26 +2983,17 @@ static void cm_format_rej_event(struct cm_work *work)
IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
}
-static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
+static struct cm_id_private *cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
{
- struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
__be32 remote_id;
remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
- spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id(
+ cm_id_priv = cm_find_remote_id(
*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
remote_id);
- if (!timewait_info) {
- spin_unlock_irq(&cm.lock);
- return NULL;
- }
- cm_id_priv =
- cm_acquire_id(timewait_info->work.local_id, remote_id);
- spin_unlock_irq(&cm.lock);
} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
CM_MSG_RESPONSE_REQ)
cm_id_priv = cm_acquire_id(
@@ -2930,7 +3011,6 @@ static int cm_rej_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rej_msg *rej_msg;
- int ret;
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_rejected_id(rej_msg);
@@ -2945,8 +3025,8 @@ static int cm_rej_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
@@ -2955,8 +3035,8 @@ static int cm_rej_handler(struct cm_work *work)
cm_reset_to_idle(cm_id_priv);
break;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
cm_enter_timewait(cm_id_priv);
@@ -2965,30 +3045,18 @@ static int cm_rej_handler(struct cm_work *work)
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
break;
}
- /* fall through */
+ fallthrough;
default:
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
- ret = -EINVAL;
goto out;
}
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3019,7 +3087,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch(cm_id_priv->id.state) {
+ switch (cm_id_priv->id.state) {
case IB_CM_REQ_RCVD:
cm_state = IB_CM_MRA_REQ_SENT;
lap_state = cm_id->lap_state;
@@ -3037,26 +3105,27 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
msg_response = CM_MSG_RESPONSE_OTHER;
break;
}
- /* fall through */
+ fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
- goto error1;
+ goto error_unlock;
}
if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error1;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto error_unlock;
+ }
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
msg_response, service_timeout,
private_data, private_data_len);
+ trace_icm_send_mra(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
- goto error2;
+ goto error_free_msg;
}
cm_id->state = cm_state;
@@ -3066,18 +3135,16 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- return ret;
-
-error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
+error_free_msg:
cm_free_msg(msg);
+error_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ kfree(data);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_mra);
-static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
+static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
{
switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
case CM_MSG_RESPONSE_REQ:
@@ -3098,7 +3165,7 @@ static int cm_mra_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_mra_msg *mra_msg;
- int timeout, ret;
+ int timeout;
mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_mraed_id(mra_msg);
@@ -3117,16 +3184,14 @@ static int cm_mra_handler(struct cm_work *work)
case IB_CM_REQ_SENT:
if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
CM_MSG_RESPONSE_REQ ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
CM_MSG_RESPONSE_REP ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
@@ -3134,39 +3199,28 @@ static int cm_mra_handler(struct cm_work *work)
if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout)) {
+ ib_modify_mad(cm_id_priv->msg, timeout)) {
if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- atomic_long_inc(&work->port->
- counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
goto out;
}
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
case IB_CM_MRA_REQ_RCVD:
case IB_CM_MRA_REP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
- /* fall through */
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
+ fallthrough;
default:
- pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
cm_id_priv->msg->context[1] = (void *) (unsigned long)
cm_id_priv->id.state;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
spin_unlock_irq(&cm_id_priv->lock);
@@ -3226,6 +3280,8 @@ static int cm_lap_handler(struct cm_work *work)
struct cm_lap_msg *lap_msg;
struct ib_cm_lap_event_param *param;
struct ib_mad_send_buf *msg = NULL;
+ struct rdma_ah_attr ah_attr;
+ struct cm_av alt_av = {};
int ret;
/* Currently Alternate path messages are not supported for
@@ -3254,7 +3310,25 @@ static int cm_lap_handler(struct cm_work *work)
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
+ ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto deref;
+
+ ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
+ if (ret) {
+ rdma_destroy_ah_attr(&ah_attr);
+ goto deref;
+ }
+
spin_lock_irq(&cm_id_priv->lock);
+ cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+ &ah_attr, &cm_id_priv->av);
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
+
if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
goto unlock;
@@ -3263,8 +3337,8 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_LAP_IDLE:
break;
case IB_CM_MRA_LAP_SENT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
if (IS_ERR(msg))
goto unlock;
@@ -3278,38 +3352,19 @@ static int cm_lap_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL))
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
goto deref;
case IB_CM_LAP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
goto unlock;
default:
goto unlock;
}
- ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
- if (ret)
- goto unlock;
-
- ret = cm_init_av_by_path(param->alternate_path, NULL,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto unlock;
-
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -3321,7 +3376,6 @@ static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_apr_msg *apr_msg;
- int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
@@ -3354,18 +3408,8 @@ static int cm_apr_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- cm_id_priv->msg = NULL;
-
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3376,7 +3420,6 @@ static int cm_timewait_handler(struct cm_work *work)
{
struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
- int ret;
timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
@@ -3395,15 +3438,7 @@ static int cm_timewait_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3433,6 +3468,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
+ struct cm_av av = {};
unsigned long flags;
int ret;
@@ -3441,40 +3477,42 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, param->sgid_attr,
- &cm_id_priv->av,
- cm_id_priv);
+ ret = cm_init_av_by_path(param->path, param->sgid_attr, &av);
if (ret)
- goto out;
+ return ret;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ cm_move_av_from_path(&cm_id_priv->av, &av);
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
cm_id_priv->max_cm_retries = param->max_cm_retries;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
+ if (cm_id->state != IB_CM_IDLE) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
+ }
- cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
+ cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
param);
msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
+ msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
- ret = ib_post_send_mad(msg, NULL);
- else
- ret = -EINVAL;
-
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- goto out;
- }
+ trace_icm_send_sidr_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_SIDR_REQ_SENT;
- cm_id_priv->msg = msg;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-out:
+ return 0;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
@@ -3502,64 +3540,73 @@ static void cm_format_sidr_req_event(struct cm_work *work,
static int cm_sidr_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+ struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
int ret;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
/* Record SGID/SLID and request ID for lookup. */
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
+
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
+ cm_id_priv->tid = sidr_req_msg->hdr.tid;
+
wc = work->mad_recv_wc->wc;
- cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
- cm_id_priv->av.dgid.global.interface_id = 0;
+ cm_id_priv->sidr_slid = wc->slid;
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
if (ret)
goto out;
- cm_id_priv->id.remote_id =
- cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
- cm_id_priv->tid = sidr_req_msg->hdr.tid;
- atomic_inc(&cm_id_priv->work_count);
-
spin_lock_irq(&cm.lock);
- cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
- if (cur_cm_id_priv) {
+ listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+ if (listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_SIDR_REQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_SIDR_REQ_COUNTER]);
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
- cur_cm_id_priv = cm_find_listen(
- cm_id->device,
- cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
- if (!cur_cm_id_priv) {
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ cm_id_priv->id.service_id);
+ if (!listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
+ ib_send_cm_sidr_rep(&cm_id_priv->id,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_UNSUPPORTED });
goto out; /* No match. */
}
- refcount_inc(&cur_cm_id_priv->refcount);
- refcount_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
- cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = cur_cm_id_priv->id.context;
- cm_id_priv->id.service_id =
- cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
- cm_deref_id(cur_cm_id_priv);
+ /*
+ * A SIDR ID does not need to be in the xarray since it does not receive
+ * mads, is not placed in the remote_id or remote_qpn rbtree, and does
+ * not enter timewait.
+ */
+
+ cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+ cm_free_work(work);
+ /*
+ * A pointer to the listen_cm_id is held in the event, so this deref
+ * must be after the event is delivered above.
+ */
+ cm_deref_id(listen_cm_id_priv);
+ if (ret)
+ cm_destroy_id(&cm_id_priv->id, ret);
return 0;
out:
ib_destroy_cm_id(&cm_id_priv->id);
@@ -3570,8 +3617,8 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
- cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
- cm_id_priv->tid);
+ cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+ cm_id_priv->tid, param->ece.attr_mod);
IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
be32_to_cpu(cm_id_priv->id.remote_id));
IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
@@ -3579,6 +3626,10 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
be64_to_cpu(cm_id_priv->id.service_id));
IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg,
+ param->ece.vendor_id & 0xFF);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg,
+ (param->ece.vendor_id >> 8) & 0xFF);
if (param->info && param->info_length)
IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
@@ -3589,41 +3640,36 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
param->private_data, param->private_data_len);
}
-int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
- struct ib_cm_sidr_rep_param *param)
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
(param->private_data &&
param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
- ret = -EINVAL;
- goto error;
- }
+ if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
+ return -EINVAL;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
- cm_id->state = IB_CM_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-
+ cm_id_priv->id.state = IB_CM_IDLE;
spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
@@ -3631,8 +3677,19 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
}
spin_unlock_irqrestore(&cm.lock, flags);
return 0;
+}
-error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_rep_param *param)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_sidr_rep_locked(cm_id_priv, param);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_rep);
@@ -3676,7 +3733,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
spin_unlock_irq(&cm_id_priv->lock);
cm_format_sidr_rep_event(work, cm_id_priv);
@@ -3687,25 +3744,28 @@ out:
return -EINVAL;
}
-static void cm_process_send_error(struct ib_mad_send_buf *msg,
+static void cm_process_send_error(struct cm_id_private *cm_id_priv,
+ struct ib_mad_send_buf *msg,
+ enum ib_cm_state state,
enum ib_wc_status wc_status)
{
- struct cm_id_private *cm_id_priv;
- struct ib_cm_event cm_event;
- enum ib_cm_state state;
+ struct ib_cm_event cm_event = {};
int ret;
- memset(&cm_event, 0, sizeof cm_event);
- cm_id_priv = msg->context[0];
-
/* Discard old sends or ones without a response. */
spin_lock_irq(&cm_id_priv->lock);
- state = (enum ib_cm_state) (unsigned long) msg->context[1];
- if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
- goto discard;
+ if (msg != cm_id_priv->msg) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_free_msg(msg);
+ return;
+ }
+ cm_free_priv_msg(msg);
+
+ if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS ||
+ wc_status == IB_WC_WR_FLUSH_ERR)
+ goto out_unlock;
- pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
- state, ib_wc_status_msg(wc_status));
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3726,26 +3786,27 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
cm_event.event = IB_CM_SIDR_REQ_ERROR;
break;
default:
- goto discard;
+ goto out_unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
cm_event.param.send_status = wc_status;
/* No other events can occur on the cm_id at this point. */
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
- cm_free_msg(msg);
if (ret)
ib_destroy_cm_id(&cm_id_priv->id);
return;
-discard:
+out_unlock:
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_msg(msg);
}
static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_id_private *cm_id_priv = msg->context[0];
+ enum ib_cm_state state =
+ (enum ib_cm_state)(unsigned long)msg->context[1];
struct cm_port *port;
u16 attr_index;
@@ -3758,28 +3819,19 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
* set to a cm_id), and is not a REJ, then it is a send that was
* manually retried.
*/
- if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ if (!cm_id_priv && (attr_index != CM_REJ_COUNTER))
msg->retries = 1;
- atomic_long_add(1 + msg->retries,
- &port->counter_group[CM_XMIT].counter[attr_index]);
+ atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]);
if (msg->retries)
atomic_long_add(msg->retries,
- &port->counter_group[CM_XMIT_RETRIES].
- counter[attr_index]);
+ &port->counters[CM_XMIT_RETRIES][attr_index]);
- switch (mad_send_wc->status) {
- case IB_WC_SUCCESS:
- case IB_WC_WR_FLUSH_ERR:
- cm_free_msg(msg);
- break;
- default:
- if (msg->context[0] && msg->context[1])
- cm_process_send_error(msg, mad_send_wc->status);
- else
- cm_free_msg(msg);
- break;
- }
+ if (cm_id_priv)
+ cm_process_send_error(cm_id_priv, msg, state,
+ mad_send_wc->status);
+ else
+ cm_free_response_msg(msg);
}
static void cm_work_handler(struct work_struct *_work)
@@ -3828,7 +3880,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3854,8 +3906,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state)
- {
+ switch (cm_id->state) {
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
cm_id->state = IB_CM_ESTABLISHED;
@@ -3864,8 +3915,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@@ -3905,9 +3955,7 @@ out:
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct cm_av tmp_av;
unsigned long flags;
- int tmp_send_port_not_ready;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3916,14 +3964,7 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
- /* Swap address vector */
- tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av;
- cm_id_priv->alt_av = tmp_av;
- /* Swap port send ready state */
- tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
- cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
- cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -4005,8 +4046,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
}
attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
- atomic_long_inc(&port->counter_group[CM_RECV].
- counter[attr_id - CM_ATTR_ID_OFFSET]);
+ atomic_long_inc(&port->counters[CM_RECV][attr_id - CM_ATTR_ID_OFFSET]);
work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
if (!work) {
@@ -4058,13 +4098,12 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_ATOMIC;
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
- qp_attr->port_num = cm_id_priv->av.port->port_num;
+ if (cm_id_priv->av.port)
+ qp_attr->port_num = cm_id_priv->av.port->port_num;
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4091,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) &&
+ cm_id_priv->av.dlid_datapath &&
+ (cm_id_priv->av.dlid_datapath != 0xffff))
+ qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
@@ -4102,7 +4145,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) &&
+ cm_id_priv->alt_av.port) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -4112,9 +4156,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4151,7 +4193,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
- /* fall through */
+ fallthrough;
case IB_QPT_XRC_TGT:
*qp_attr_mask |= IB_QP_TIMEOUT;
qp_attr->timeout = cm_id_priv->av.timeout;
@@ -4165,7 +4207,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
}
} else {
*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
- qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+ if (cm_id_priv->alt_av.port)
+ qp_attr->alt_port_num =
+ cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4174,9 +4218,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4210,75 +4252,76 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
- char *buf)
+static ssize_t cm_show_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
- struct cm_counter_group *group;
- struct cm_counter_attribute *cm_attr;
-
- group = container_of(obj, struct cm_counter_group, obj);
- cm_attr = container_of(attr, struct cm_counter_attribute, attr);
-
- return sprintf(buf, "%ld\n",
- atomic_long_read(&group->counter[cm_attr->index]));
-}
-
-static const struct sysfs_ops cm_counter_ops = {
- .show = cm_show_counter
-};
+ struct cm_counter_attribute *cm_attr =
+ container_of(attr, struct cm_counter_attribute, attr);
+ struct cm_device *cm_dev = ib_get_client_data(ibdev, &cm_client);
-static struct kobj_type cm_counter_obj_type = {
- .sysfs_ops = &cm_counter_ops,
- .default_attrs = cm_counter_default_attrs
-};
+ if (WARN_ON(!cm_dev))
+ return -EINVAL;
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ return sysfs_emit(
+ buf, "%ld\n",
+ atomic_long_read(
+ &cm_dev->port[port_num - 1]
+ ->counters[cm_attr->group][cm_attr->index]));
}
-struct class cm_class = {
- .owner = THIS_MODULE,
- .name = "infiniband_cm",
- .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
-static int cm_create_port_fs(struct cm_port *port)
-{
- int i, ret;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++) {
- ret = ib_port_register_module_stat(port->cm_dev->ib_device,
- port->port_num,
- &port->counter_group[i].obj,
- &cm_counter_obj_type,
- counter_group_names[i]);
- if (ret)
- goto error;
+#define CM_COUNTER_ATTR(_name, _group, _index) \
+ { \
+ .attr = __ATTR(_name, 0444, cm_show_counter, NULL), \
+ .group = _group, .index = _index \
}
- return 0;
-
-error:
- while (i--)
- ib_port_unregister_module_stat(&port->counter_group[i].obj);
- return ret;
-
-}
-
-static void cm_remove_port_fs(struct cm_port *port)
-{
- int i;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++)
- ib_port_unregister_module_stat(&port->counter_group[i].obj);
+#define CM_COUNTER_GROUP(_group, _name) \
+ static struct cm_counter_attribute cm_counter_attr_##_group[] = { \
+ CM_COUNTER_ATTR(req, _group, CM_REQ_COUNTER), \
+ CM_COUNTER_ATTR(mra, _group, CM_MRA_COUNTER), \
+ CM_COUNTER_ATTR(rej, _group, CM_REJ_COUNTER), \
+ CM_COUNTER_ATTR(rep, _group, CM_REP_COUNTER), \
+ CM_COUNTER_ATTR(rtu, _group, CM_RTU_COUNTER), \
+ CM_COUNTER_ATTR(dreq, _group, CM_DREQ_COUNTER), \
+ CM_COUNTER_ATTR(drep, _group, CM_DREP_COUNTER), \
+ CM_COUNTER_ATTR(sidr_req, _group, CM_SIDR_REQ_COUNTER), \
+ CM_COUNTER_ATTR(sidr_rep, _group, CM_SIDR_REP_COUNTER), \
+ CM_COUNTER_ATTR(lap, _group, CM_LAP_COUNTER), \
+ CM_COUNTER_ATTR(apr, _group, CM_APR_COUNTER), \
+ }; \
+ static struct attribute *cm_counter_attrs_##_group[] = { \
+ &cm_counter_attr_##_group[0].attr.attr, \
+ &cm_counter_attr_##_group[1].attr.attr, \
+ &cm_counter_attr_##_group[2].attr.attr, \
+ &cm_counter_attr_##_group[3].attr.attr, \
+ &cm_counter_attr_##_group[4].attr.attr, \
+ &cm_counter_attr_##_group[5].attr.attr, \
+ &cm_counter_attr_##_group[6].attr.attr, \
+ &cm_counter_attr_##_group[7].attr.attr, \
+ &cm_counter_attr_##_group[8].attr.attr, \
+ &cm_counter_attr_##_group[9].attr.attr, \
+ &cm_counter_attr_##_group[10].attr.attr, \
+ NULL, \
+ }; \
+ static const struct attribute_group cm_counter_group_##_group = { \
+ .name = _name, \
+ .attrs = cm_counter_attrs_##_group, \
+ };
-}
+CM_COUNTER_GROUP(CM_XMIT, "cm_tx_msgs")
+CM_COUNTER_GROUP(CM_XMIT_RETRIES, "cm_tx_retries")
+CM_COUNTER_GROUP(CM_RECV, "cm_rx_msgs")
+CM_COUNTER_GROUP(CM_RECV_DUPLICATES, "cm_rx_duplicates")
+
+static const struct attribute_group *cm_counter_groups[] = {
+ &cm_counter_group_CM_XMIT,
+ &cm_counter_group_CM_XMIT_RETRIES,
+ &cm_counter_group_CM_RECV,
+ &cm_counter_group_CM_RECV_DUPLICATES,
+ NULL,
+};
-static void cm_add_one(struct ib_device *ib_device)
+static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
@@ -4292,34 +4335,38 @@ static void cm_add_one(struct ib_device *ib_device)
unsigned long flags;
int ret;
int count = 0;
- u8 i;
+ u32 i;
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
if (!cm_dev)
- return;
+ return -ENOMEM;
+ kref_init(&cm_dev->kref);
+ spin_lock_init(&cm_dev->mad_agent_lock);
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
+ ib_set_client_data(ib_device, &cm_client, cm_dev);
+
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = kzalloc(sizeof *port, GFP_KERNEL);
- if (!port)
+ if (!port) {
+ ret = -ENOMEM;
goto error1;
+ }
cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
port->port_num = i;
- INIT_LIST_HEAD(&port->cm_priv_prim_list);
- INIT_LIST_HEAD(&port->cm_priv_altr_list);
-
- ret = cm_create_port_fs(port);
+ ret = ib_port_register_client_groups(ib_device, i,
+ cm_counter_groups);
if (ret)
goto error1;
@@ -4331,8 +4378,10 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
- if (IS_ERR(port->mad_agent))
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
goto error2;
+ }
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
@@ -4341,24 +4390,23 @@ static void cm_add_one(struct ib_device *ib_device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
-
- ib_set_client_data(ib_device, &cm_client, cm_dev);
+ }
write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
- return;
+ return 0;
error3:
ib_unregister_mad_agent(port->mad_agent);
error2:
- cm_remove_port_fs(port);
+ ib_port_unregister_client_groups(ib_device, i, cm_counter_groups);
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
- kfree(port);
while (--i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4366,27 +4414,23 @@ error1:
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
- cm_remove_port_fs(port);
- kfree(port);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
free:
- kfree(cm_dev);
+ cm_device_put(cm_dev);
+ return ret;
}
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev = client_data;
struct cm_port *port;
- struct cm_id_private *cm_id_priv;
- struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
};
unsigned long flags;
- int i;
-
- if (!cm_dev)
- return;
+ u32 i;
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
@@ -4396,35 +4440,34 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_dev->going_down = 1;
spin_unlock_irq(&cm.lock);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
+ struct ib_mad_agent *mad_agent;
+
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = cm_dev->port[i-1];
+ mad_agent = port->mad_agent;
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
- /* Mark all the cm_id's as not valid */
- spin_lock_irq(&cm.lock);
- list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
- cm_id_priv->altr_send_port_not_ready = 1;
- list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
- cm_id_priv->prim_send_port_not_ready = 1;
- spin_unlock_irq(&cm.lock);
/*
* We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent
*/
flush_workqueue(cm.wq);
- spin_lock_irq(&cm.state_lock);
- cur_mad_agent = port->mad_agent;
+ /*
+ * The above ensures no call paths from the work are running,
+ * the remaining paths all take the mad_agent_lock.
+ */
+ spin_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
- spin_unlock_irq(&cm.state_lock);
- ib_unregister_mad_agent(cur_mad_agent);
- cm_remove_port_fs(port);
- kfree(port);
+ spin_unlock(&cm_dev->mad_agent_lock);
+ ib_unregister_mad_agent(mad_agent);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
- kfree(cm_dev);
+ cm_device_put(cm_dev);
}
static int __init ib_cm_init(void)
@@ -4434,22 +4477,15 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
- spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
cm.remote_qp_table = RB_ROOT;
cm.remote_sidr_table = RB_ROOT;
- xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
- ret = class_register(&cm_class);
- if (ret) {
- ret = -ENOMEM;
- goto error1;
- }
-
cm.wq = alloc_workqueue("ib_cm", 0, 1);
if (!cm.wq) {
ret = -ENOMEM;
@@ -4464,8 +4500,6 @@ static int __init ib_cm_init(void)
error3:
destroy_workqueue(cm.wq);
error2:
- class_unregister(&cm_class);
-error1:
return ret;
}
@@ -4486,7 +4520,6 @@ static void __exit ib_cm_cleanup(void)
kfree(timewait_info);
}
- class_unregister(&cm_class);
WARN_ON(!xa_empty(&cm.local_id_table));
}
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 0cc40656b5c5..8462de7ca26e 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -22,7 +22,7 @@
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg);
- switch(transport_type) {
+ switch (transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
@@ -37,7 +37,7 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
enum ib_qp_type qp_type)
{
- switch(qp_type) {
+ switch (qp_type) {
case IB_QPT_UC:
IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1);
break;
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 000000000000..8f3482f66338
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 000000000000..e9d282679ef1
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2dec3a02ab9f..26d1772179b8 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -43,7 +45,6 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
-#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
@@ -68,6 +69,9 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -91,7 +95,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_reject_msg);
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
{
if (rdma_ib_or_roce(id->device, id->port_num))
return reason == IB_CM_REJ_CONSUMER_DEFINED;
@@ -102,7 +112,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
WARN_ON_ONCE(1);
return false;
}
-EXPORT_SYMBOL(rdma_is_consumer_reject);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len)
@@ -148,7 +157,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_res_to_id);
-static void cma_add_one(struct ib_device *device);
+static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
@@ -161,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -195,11 +207,16 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
u8 *default_roce_tos;
@@ -211,14 +228,6 @@ struct rdma_bind_list {
unsigned short port;
};
-struct class_port_info_context {
- struct ib_class_port_info *class_port_info;
- struct ib_device *device;
- struct completion done;
- struct ib_sa_query *sa_query;
- u8 port_num;
-};
-
static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
@@ -247,9 +256,15 @@ enum {
CMA_OPTION_AFONLY,
};
-void cma_ref_dev(struct cma_device *cma_dev)
+void cma_dev_get(struct cma_device *cma_dev)
+{
+ refcount_inc(&cma_dev->refcount);
+}
+
+void cma_dev_put(struct cma_device *cma_dev)
{
- atomic_inc(&cma_dev->refcount);
+ if (refcount_dec_and_test(&cma_dev->refcount))
+ complete(&cma_dev->comp);
}
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
@@ -267,13 +282,13 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
}
if (found_cma_dev)
- cma_ref_dev(found_cma_dev);
+ cma_dev_get(found_cma_dev);
mutex_unlock(&lock);
return found_cma_dev;
}
int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port)
+ u32 port)
{
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
@@ -282,7 +297,7 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
}
int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
+ u32 port,
enum ib_gid_type default_gid_type)
{
unsigned long supported_gids;
@@ -290,6 +305,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
+ if (default_gid_type == IB_GID_TYPE_IB &&
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
+ default_gid_type = IB_GID_TYPE_ROCE;
+
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
@@ -301,7 +320,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+int cma_get_default_roce_tos(struct cma_device *cma_dev, u32 port)
{
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
@@ -309,7 +328,7 @@ int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
}
-int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+int cma_set_default_roce_tos(struct cma_device *cma_dev, u32 port,
u8 default_roce_tos)
{
if (!rdma_is_port_valid(cma_dev->device, port))
@@ -335,12 +354,15 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
struct cma_multicast {
struct rdma_id_private *id_priv;
union {
- struct ib_sa_multicast *ib;
- } multicast;
+ struct ib_sa_multicast *sa_mc;
+ struct {
+ struct work_struct work;
+ struct rdma_cm_event event;
+ } iboe_join;
+ };
struct list_head list;
void *context;
struct sockaddr_storage addr;
- struct kref mcref;
u8 join_state;
};
@@ -352,18 +374,6 @@ struct cma_work {
struct rdma_cm_event event;
};
-struct cma_ndev_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct rdma_cm_event event;
-};
-
-struct iboe_mcast_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct cma_multicast *mc;
-};
-
union cma_ip_addr {
struct in6_addr ip6;
struct {
@@ -393,23 +403,21 @@ struct cma_req_info {
u16 pkey;
};
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- ret = (id_priv->state == comp);
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return ret;
-}
-
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
+ /*
+ * The FSM uses a funny double locking where state is protected by both
+ * the handler_mutex and the spinlock. State is not allowed to change
+ * to/from a handler_mutex protected value without also holding
+ * handler_mutex.
+ */
+ if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT)
+ lockdep_assert_held(&id_priv->handler_mutex);
+
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
@@ -417,27 +425,24 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
- enum rdma_cm_state exch)
+static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
{
- unsigned long flags;
- enum rdma_cm_state old;
+ return hdr->ip_version >> 4;
+}
- spin_lock_irqsave(&id_priv->lock, flags);
- old = id_priv->state;
- id_priv->state = exch;
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return old;
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+ hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
- return hdr->ip_version >> 4;
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
- hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
@@ -460,19 +465,128 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET)
+ return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
+ (char *)&((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
- cma_ref_dev(cma_dev);
+ cma_dev_get(cma_dev);
id_priv->cma_dev = cma_dev;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
- list_add_tail(&id_priv->list, &cma_dev->id_list);
- if (id_priv->res.kern_name)
- rdma_restrack_kadd(&id_priv->res);
- else
- rdma_restrack_uadd(&id_priv->res);
+ list_add_tail(&id_priv->device_item, &cma_dev->id_list);
+
+ trace_cm_id_attach(id_priv, cma_dev->device);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -484,39 +598,20 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
rdma_start_port(cma_dev->device)];
}
-void cma_deref_dev(struct cma_device *cma_dev)
-{
- if (atomic_dec_and_test(&cma_dev->refcount))
- complete(&cma_dev->comp);
-}
-
-static inline void release_mc(struct kref *kref)
-{
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
- kfree(mc->multicast.ib);
- kfree(mc);
-}
-
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
- list_del(&id_priv->list);
- cma_deref_dev(id_priv->cma_dev);
+ list_del_init(&id_priv->device_item);
+ cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ id_priv->id.device = NULL;
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
+ }
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
@@ -579,7 +674,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static const struct ib_gid_attr *
-cma_validate_port(struct ib_device *device, u8 port,
+cma_validate_port(struct ib_device *device, u32 port,
enum ib_gid_type gid_type,
union ib_gid *gid,
struct rdma_id_private *id_priv)
@@ -637,7 +732,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
- unsigned int port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@@ -717,6 +812,7 @@ static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
mutex_lock(&lock);
cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
mutex_unlock(&lock);
+ rdma_restrack_add(&id_priv->res);
return 0;
}
@@ -729,7 +825,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
enum ib_gid_type gid_type;
int ret = -ENODEV;
union ib_gid gid;
- u8 port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@@ -753,7 +849,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
}
list_for_each_entry(cma_dev, &dev_list, list) {
- for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ rdma_for_each_port (cma_dev->device, port) {
if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
@@ -771,8 +867,10 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
}
out:
- if (!ret)
+ if (!ret) {
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
+ }
mutex_unlock(&lock);
return ret;
@@ -786,9 +884,10 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
struct sockaddr_ib *addr;
union ib_gid gid, sgid, *dgid;
+ unsigned int p;
u16 pkey, index;
- u8 p;
enum ib_port_state port_state;
+ int ret;
int i;
cma_dev = NULL;
@@ -798,7 +897,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
mutex_lock(&lock);
list_for_each_entry(cur_dev, &dev_list, list) {
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!rdma_cap_af_ib(cur_dev->device, p))
continue;
@@ -807,9 +906,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !rdma_query_gid(cur_dev->device,
- p, i, &gid);
- i++) {
+
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(cur_dev->device, p, i,
+ &gid);
+ if (ret)
+ continue;
+
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -833,6 +937,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
found:
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
mutex_unlock(&lock);
addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
@@ -840,16 +945,21 @@ found:
return 0;
}
-static void cma_deref_id(struct rdma_id_private *id_priv)
+static void cma_id_get(struct rdma_id_private *id_priv)
{
- if (atomic_dec_and_test(&id_priv->refcount))
+ refcount_inc(&id_priv->refcount);
+}
+
+static void cma_id_put(struct rdma_id_private *id_priv)
+{
+ if (refcount_dec_and_test(&id_priv->refcount))
complete(&id_priv->comp);
}
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
{
struct rdma_id_private *id_priv;
@@ -857,8 +967,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- rdma_restrack_set_task(&id_priv->res, caller);
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -866,22 +974,60 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.qp_type = qp_type;
id_priv->tos_set = false;
id_priv->timeout_set = false;
+ id_priv->min_rnr_timer_set = false;
id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
- atomic_set(&id_priv->refcount, 1);
+ refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
+ INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
- trace_cm_id_create(id_priv);
- return &id_priv->id;
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+ if (parent)
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+ return id_priv;
+}
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, caller);
+ return &ret->id;
}
-EXPORT_SYMBOL(__rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+ ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, NULL);
+ return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1114,12 +1260,16 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
qp_attr_mask);
qp_attr->port_num = id_priv->id.port_num;
*qp_attr_mask |= IB_QP_PORT;
- } else
+ } else {
ret = -ENOSYS;
+ }
if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
qp_attr->timeout = id_priv->timeout;
+ if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+ qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
+
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -1406,7 +1556,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
return false;
memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_iif = net_dev->ifindex;
+ fl4.flowi4_oif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
@@ -1560,7 +1710,7 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
- const int port_num = id->port_num ?: rdma_start_port(device);
+ const u32 port_num = id->port_num ?: rdma_start_port(device);
return rdma_protocol_roce(device, port_num);
}
@@ -1614,6 +1764,8 @@ static struct rdma_id_private *cma_find_listener(
{
struct rdma_id_private *id_priv, *id_priv_dev;
+ lockdep_assert_held(&lock);
+
if (!bind_list)
return ERR_PTR(-EINVAL);
@@ -1624,7 +1776,7 @@ static struct rdma_id_private *cma_find_listener(
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
- listen_list) {
+ listen_item) {
if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(&id_priv_dev->id,
net_dev, req))
@@ -1660,6 +1812,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
}
+ mutex_lock(&lock);
/*
* Net namespace might be getting deleted while route lookup,
* cm_id lookup is in progress. Therefore, perform netdevice
@@ -1688,8 +1841,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
- (struct sockaddr *)&req->listen_addr_storage,
- (struct sockaddr *)&req->src_addr_storage)) {
+ (struct sockaddr *)&req->src_addr_storage,
+ (struct sockaddr *)&req->listen_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
@@ -1701,6 +1854,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err:
rcu_read_unlock();
+ mutex_unlock(&lock);
if (IS_ERR(id_priv) && *net_dev) {
dev_put(*net_dev);
*net_dev = NULL;
@@ -1721,28 +1875,36 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
+ lockdep_assert_held(&lock);
+
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
- mutex_lock(&lock);
- list_del(&id_priv->list);
+ list_del_init(&id_priv->listen_any_item);
while (!list_empty(&id_priv->listen_list)) {
- dev_id_priv = list_entry(id_priv->listen_list.next,
- struct rdma_id_private, listen_list);
+ dev_id_priv =
+ list_first_entry(&id_priv->listen_list,
+ struct rdma_id_private, listen_item);
/* sync with device removal to avoid duplicate destruction */
- list_del_init(&dev_id_priv->list);
- list_del(&dev_id_priv->listen_list);
+ list_del_init(&dev_id_priv->device_item);
+ list_del_init(&dev_id_priv->listen_item);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+ mutex_lock(&lock);
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
}
@@ -1751,6 +1913,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -1782,19 +1952,39 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
- struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
{
- struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+ ib_sa_free_multicast(mc->sa_mc);
+
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
+ union ib_gid mgid;
+
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
+ cma_igmp_send(ndev, &mgid, false);
+ }
dev_put(ndev);
+
+ cancel_work_sync(&mc->iboe_join.work);
}
- kref_put(&mc->mcref, release_mc);
+ kfree(mc);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -1802,37 +1992,20 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
- mc = container_of(id_priv->mc_list.next,
- struct cma_multicast, list);
+ mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+ list);
list_del(&mc->list);
- if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
- id_priv->id.port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else {
- cma_leave_roce_mc_group(id_priv, mc);
- }
+ destroy_mc(id_priv, mc);
}
}
-void rdma_destroy_id(struct rdma_cm_id *id)
+static void _destroy_id(struct rdma_id_private *id_priv,
+ enum rdma_cm_state state)
{
- struct rdma_id_private *id_priv;
- enum rdma_cm_state state;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- trace_cm_id_destroy(id_priv);
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- /*
- * Wait for any active callback to finish. New callbacks will find
- * the id_priv state set to destroying and abort.
- */
- mutex_lock(&id_priv->handler_mutex);
- mutex_unlock(&id_priv->handler_mutex);
-
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1846,20 +2019,55 @@ void rdma_destroy_id(struct rdma_cm_id *id)
}
cma_release_port(id_priv);
- cma_deref_id(id_priv);
+ cma_id_put(id_priv);
wait_for_completion(&id_priv->comp);
if (id_priv->internal_id)
- cma_deref_id(id_priv->id.context);
+ cma_id_put(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
-
- if (id_priv->id.route.addr.dev_addr.sgid_attr)
- rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ kfree(id_priv->id.route.path_rec_inbound);
+ kfree(id_priv->id.route.path_rec_outbound);
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
+
+/*
+ * destroy an ID from within the handler_mutex. This ensures that no other
+ * handlers can start running concurrently.
+ */
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
+ __releases(&idprv->handler_mutex)
+{
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ trace_cm_id_destroy(id_priv);
+
+ /*
+ * Setting the state to destroyed under the handler mutex provides a
+ * fence against calling handler callbacks. If this is invoked due to
+ * the failure of a handler callback then it guarentees that no future
+ * handlers will be called.
+ */
+ lockdep_assert_held(&id_priv->handler_mutex);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ id_priv->state = RDMA_CM_DESTROYING;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ _destroy_id(id_priv, state);
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+ destroy_id_handler_unlock(id_priv);
+}
EXPORT_SYMBOL(rdma_destroy_id);
static int cma_rep_recv(struct rdma_id_private *id_priv)
@@ -1901,6 +2109,9 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
event->param.conn.srq = rep_data->srq;
event->param.conn.qp_num = rep_data->remote_qpn;
+
+ event->ece.vendor_id = rep_data->ece.vendor_id;
+ event->ece.attr_mod = rep_data->ece.attr_mod;
}
static int cma_cm_event_handler(struct rdma_id_private *id_priv,
@@ -1908,6 +2119,8 @@ static int cma_cm_event_handler(struct rdma_id_private *id_priv,
{
int ret;
+ lockdep_assert_held(&id_priv->handler_mutex);
+
trace_cm_event_handler(id_priv, event);
ret = id_priv->id.event_handler(&id_priv->id, event);
trace_cm_event_done(id_priv, event, ret);
@@ -1919,13 +2132,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event = {};
- int ret = 0;
+ enum rdma_cm_state state;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
+ state = READ_ONCE(id_priv->state);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_CONNECT) ||
+ state != RDMA_CM_CONNECT) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_DISCONNECT))
+ state != RDMA_CM_DISCONNECT))
goto out;
switch (ib_event->event) {
@@ -1935,7 +2150,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ if (state == RDMA_CM_CONNECT &&
(id_priv->id.qp_type != IB_QPT_UD)) {
trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
@@ -1955,7 +2170,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.event = RDMA_CM_EVENT_ESTABLISHED;
break;
case IB_CM_DREQ_ERROR:
- event.status = -ETIMEDOUT; /* fall through */
+ event.status = -ETIMEDOUT;
+ fallthrough;
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
@@ -1988,14 +2204,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static struct rdma_id_private *
@@ -2014,28 +2228,29 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
- listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
+ listen_id->ps,
+ ib_event->param.req_rcvd.qp_type,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
goto err;
rt = &id->route;
- rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
- rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
- GFP_KERNEL);
+ rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc_array(rt->num_pri_alt_paths,
+ sizeof(*rt->path_rec), GFP_KERNEL);
if (!rt->path_rec)
goto err;
rt->path_rec[0] = *path;
- if (rt->num_paths == 2)
+ if (rt->num_pri_alt_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
@@ -2075,13 +2290,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
+ listen_id->context, listen_id->ps, IB_QPT_UD,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
@@ -2119,6 +2334,9 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
event->param.conn.srq = req_data->srq;
event->param.conn.qp_num = req_data->remote_qpn;
+
+ event->ece.vendor_id = req_data->ece.vendor_id;
+ event->ece.attr_mod = req_data->ece.attr_mod;
}
static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
@@ -2152,9 +2370,9 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN) {
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
- goto err1;
+ goto err_unlock;
}
offset = cma_user_data_offset(listen_id);
@@ -2171,55 +2389,38 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
if (!conn_id) {
ret = -ENOMEM;
- goto err1;
+ goto err_unlock;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
- if (ret)
- goto err2;
+ if (ret) {
+ destroy_id_handler_unlock(conn_id);
+ goto err_unlock;
+ }
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
ret = cma_cm_event_handler(conn_id, &event);
- if (ret)
- goto err3;
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD)) {
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.ib = NULL;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ goto net_dev_put;
+ }
+
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+ conn_id->id.qp_type != IB_QPT_UD) {
trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
- mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
- mutex_unlock(&listen_id->handler_mutex);
- cma_deref_id(conn_id);
- if (net_dev)
- dev_put(net_dev);
- return 0;
-err3:
- cma_deref_id(conn_id);
- /* Destroy the CM ID by returning a non-zero value. */
- conn_id->cm_id.ib = NULL;
-err2:
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
-err1:
+err_unlock:
mutex_unlock(&listen_id->handler_mutex);
- if (conn_id)
- rdma_destroy_id(&conn_id->id);
net_dev_put:
if (net_dev)
@@ -2273,7 +2474,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (iw_event->event) {
@@ -2319,9 +2520,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
@@ -2333,7 +2532,6 @@ out:
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
- struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
@@ -2349,35 +2547,33 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN)
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC,
- listen_id->res.kern_name);
- if (IS_ERR(new_cm_id)) {
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context, RDMA_PS_TCP,
+ IB_QPT_RC, listen_id);
+ if (IS_ERR(conn_id)) {
ret = -ENOMEM;
goto out;
}
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
conn_id->cm_id.iw = cm_id;
@@ -2387,25 +2583,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
- cma_deref_id(conn_id);
- rdma_destroy_id(&conn_id->id);
+ destroy_id_handler_unlock(conn_id);
return ret;
}
mutex_unlock(&conn_id->handler_mutex);
- cma_deref_id(conn_id);
out:
mutex_unlock(&listen_id->handler_mutex);
@@ -2440,8 +2627,11 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
if (IS_ERR(id))
return PTR_ERR(id);
+ mutex_lock(&id_priv->qp_mutex);
id->tos = id_priv->tos;
id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+ id->afonly = id_priv->afonly;
id_priv->cm_id.iw = id;
memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@@ -2462,57 +2652,88 @@ static int cma_listen_handler(struct rdma_cm_id *id,
{
struct rdma_id_private *id_priv = id->context;
+ /* Listening IDs are always destroyed on removal */
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ return -1;
+
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev,
+ struct rdma_id_private **to_destroy)
{
struct rdma_id_private *dev_id_priv;
- struct rdma_cm_id *id;
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
- if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
- return;
+ lockdep_assert_held(&lock);
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type, id_priv->res.kern_name);
- if (IS_ERR(id))
- return;
+ *to_destroy = NULL;
+ if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
+ return 0;
- dev_id_priv = container_of(id, struct rdma_id_private, id);
+ dev_id_priv =
+ __rdma_create_id(net, cma_listen_handler, id_priv,
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
+ if (IS_ERR(dev_id_priv))
+ return PTR_ERR(dev_id_priv);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
_cma_attach_to_dev(dev_id_priv, cma_dev);
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
- atomic_inc(&id_priv->refcount);
+ rdma_restrack_add(&dev_id_priv->res);
+ cma_id_get(id_priv);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
+ mutex_lock(&id_priv->qp_mutex);
dev_id_priv->tos_set = id_priv->tos_set;
dev_id_priv->tos = id_priv->tos;
+ mutex_unlock(&id_priv->qp_mutex);
- ret = rdma_listen(id, id_priv->backlog);
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
- dev_warn(&cma_dev->device->dev,
- "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
+ goto err_listen;
+ list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list);
+ return 0;
+err_listen:
+ /* Caller must destroy this after releasing lock */
+ *to_destroy = dev_id_priv;
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
+ return ret;
}
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
+ int ret;
mutex_lock(&lock);
- list_add_tail(&id_priv->list, &listen_any_list);
- list_for_each_entry(cma_dev, &dev_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_add_tail(&id_priv->listen_any_item, &listen_any_list);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret) {
+ /* Prevent racing with cma_process_remove() */
+ if (to_destroy)
+ list_del_init(&to_destroy->device_item);
+ goto err_listen;
+ }
+ }
mutex_unlock(&lock);
+ return 0;
+
+err_listen:
+ _cma_cancel_listens(id_priv);
+ mutex_unlock(&lock);
+ if (to_destroy)
+ rdma_destroy_id(&to_destroy->id);
+ return ret;
}
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
@@ -2520,8 +2741,10 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->tos = (u8) tos;
id_priv->tos_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_set_service_type);
@@ -2544,37 +2767,124 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
{
struct rdma_id_private *id_priv;
- if (id->qp_type != IB_QPT_RC)
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->timeout = timeout;
id_priv->timeout_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
return 0;
}
EXPORT_SYMBOL(rdma_set_ack_timeout);
+/**
+ * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the
+ * QP associated with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK
+ * Timer Field" in the IBTA specification.
+ *
+ * This function should be called before rdma_connect() on active
+ * side, and on passive side before rdma_accept(). The timer value
+ * will be associated with the local QP. When it receives a send it is
+ * not read to handle, typically if the receive queue is empty, an RNR
+ * Retry NAK is returned to the requester with the min_rnr_timer
+ * encoded. The requester will then wait at least the time specified
+ * in the NAK before retrying. The default is zero, which translates
+ * to a minimum RNR Timer value of 655 ms.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
+{
+ struct rdma_id_private *id_priv;
+
+ /* It is a five-bit value */
+ if (min_rnr_timer & 0xe0)
+ return -EINVAL;
+
+ if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT))
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ id_priv->min_rnr_timer = min_rnr_timer;
+ id_priv->min_rnr_timer_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+
+static void route_set_path_rec_inbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_inbound) {
+ route->path_rec_inbound =
+ kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL);
+ if (!route->path_rec_inbound)
+ return;
+ }
+
+ *route->path_rec_inbound = *path_rec;
+}
+
+static void route_set_path_rec_outbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_outbound) {
+ route->path_rec_outbound =
+ kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL);
+ if (!route->path_rec_outbound)
+ return;
+ }
+
+ *route->path_rec_outbound = *path_rec;
+}
+
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
- void *context)
+ int num_prs, void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
+ int i;
route = &work->id->id.route;
- if (!status) {
- route->num_paths = 1;
- *route->path_rec = *path_rec;
- } else {
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
- work->event.status = status;
- pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
- status);
+ if (status)
+ goto fail;
+
+ for (i = 0; i < num_prs; i++) {
+ if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP))
+ *route->path_rec = path_rec[i];
+ else if (path_rec[i].flags & IB_PATH_INBOUND)
+ route_set_path_rec_inbound(work, &path_rec[i]);
+ else if (path_rec[i].flags & IB_PATH_OUTBOUND)
+ route_set_path_rec_outbound(work, &path_rec[i]);
+ }
+ if (!route->path_rec) {
+ status = -EINVAL;
+ goto fail;
}
+ route->num_pri_alt_paths = 1;
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
queue_work(cma_wq, &work->work);
}
@@ -2631,49 +2941,54 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv,
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
-static void cma_work_handler(struct work_struct *_work)
+static void cma_iboe_join_work_handler(struct work_struct *work)
{
- struct cma_work *work = container_of(_work, struct cma_work, work);
- struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
+ struct cma_multicast *mc =
+ container_of(work, struct cma_multicast, iboe_join.work);
+ struct rdma_cm_event *event = &mc->iboe_join.event;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
- if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
- }
-out:
+ ret = cma_cm_event_handler(id_priv, event);
+ WARN_ON(ret);
+
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
- kfree(work);
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
}
-static void cma_ndev_work_handler(struct work_struct *_work)
+static void cma_work_handler(struct work_struct *_work)
{
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+ struct cma_work *work = container_of(_work, struct cma_work, work);
struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == RDMA_CM_DESTROYING ||
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+ if (work->old_state != 0 || work->new_state != 0) {
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out_unlock;
+ }
if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ goto out_free;
}
-out:
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
+ cma_id_put(id_priv);
+out_free:
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
kfree(work);
}
@@ -2687,14 +3002,19 @@ static void cma_init_resolve_route_work(struct cma_work *work,
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
}
-static void cma_init_resolve_addr_work(struct cma_work *work,
- struct rdma_id_private *id_priv)
+static void enqueue_resolve_addr_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
{
+ /* Balances with cma_id_put() in cma_work_handler */
+ cma_id_get(id_priv);
+
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ADDR_QUERY;
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+
+ queue_work(cma_wq, &work->work);
}
static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
@@ -2710,7 +3030,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
cma_init_resolve_route_work(work, id_priv);
- route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec)
+ route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
@@ -2808,7 +3129,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
dev_put(ndev);
}
- id->route.num_paths = 1;
+ id->route.num_pri_alt_paths = 1;
return 0;
err_free:
@@ -2851,9 +3172,10 @@ struct iboe_prio_tc_map {
bool found;
};
-static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+static int get_lower_vlan_dev_tc(struct net_device *dev,
+ struct netdev_nested_priv *priv)
{
- struct iboe_prio_tc_map *map = data;
+ struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
if (is_vlan_dev(dev))
map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
@@ -2872,16 +3194,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
{
struct iboe_prio_tc_map prio_tc_map = {};
int prio = rt_tos2priority(tos);
+ struct netdev_nested_priv priv;
/* If VLAN device, get it directly from the VLAN netdev */
if (is_vlan_dev(ndev))
return get_vlan_ndev_tc(ndev, prio);
prio_tc_map.input_prio = prio;
+ priv.data = (void *)&prio_tc_map;
rcu_read_lock();
netdev_walk_all_lower_dev_rcu(ndev,
get_lower_vlan_dev_tc,
- &prio_tc_map);
+ &priv);
rcu_read_unlock();
/* If map is found from lower device, use it; Otherwise
* continue with the current netdevice to get priority to tc map.
@@ -2894,6 +3218,24 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2904,8 +3246,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ u8 tos;
+ mutex_lock(&id_priv->qp_mutex);
+ tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ mutex_unlock(&id_priv->qp_mutex);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
@@ -2917,7 +3262,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err1;
}
- route->num_paths = 1;
+ route->num_pri_alt_paths = 1;
ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
@@ -2952,14 +3297,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
* PacketLifeTime = local ACK timeout/2
* as a reasonable approximation for RoCE networks.
*/
- route->path_rec->packet_life_time = id_priv->timeout_set ?
- id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
+ mutex_lock(&id_priv->qp_mutex);
+ if (id_priv->timeout_set && id_priv->timeout)
+ route->path_rec->packet_life_time = id_priv->timeout - 1;
+ else
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ mutex_unlock(&id_priv->qp_mutex);
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
}
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+
cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
@@ -2968,6 +3322,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
+ route->num_pri_alt_paths = 0;
err1:
kfree(work);
return ret;
@@ -2978,15 +3333,21 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
struct rdma_id_private *id_priv;
int ret;
+ if (!timeout_ms)
+ return -EINVAL;
+
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
return -EINVAL;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -2998,7 +3359,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
- cma_deref_id(id_priv);
+ cma_id_put(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
@@ -3025,9 +3386,9 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
union ib_gid gid;
enum ib_port_state port_state;
+ unsigned int p;
u16 pkey;
int ret;
- u8 p;
cma_dev = NULL;
mutex_lock(&lock);
@@ -3039,7 +3400,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
if (!cma_dev)
cma_dev = cur_dev;
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
@@ -3072,6 +3433,7 @@ port_found:
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
cma_set_loopback(cma_src_addr(id_priv));
out:
mutex_unlock(&lock);
@@ -3104,6 +3466,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
+ rdma_restrack_add(&id_priv->res);
} else if (status) {
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
}
@@ -3120,9 +3483,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
if (cma_cm_event_handler(id_priv, &event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return;
}
out:
@@ -3148,9 +3509,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- atomic_inc(&id_priv->refcount);
- cma_init_resolve_addr_work(work, id_priv);
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
@@ -3175,9 +3534,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
- atomic_inc(&id_priv->refcount);
- cma_init_resolve_addr_work(work, id_priv);
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
@@ -3187,50 +3544,80 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
const struct sockaddr *dst_addr)
{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) &&
- dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr(id, src_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
}
- return rdma_bind_addr(id, src_addr);
+ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
}
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
{
- struct rdma_id_private *id_priv;
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (id_priv->state == RDMA_CM_IDLE) {
- ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret) {
- memset(cma_dst_addr(id_priv), 0,
- rdma_addr_size(dst_addr));
- return ret;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ goto err_dst;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY))) {
+ ret = -EINVAL;
+ goto err_dst;
}
}
if (cma_family(id_priv) != dst_addr->sa_family) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_state;
}
+ return 0;
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
- }
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+err_dst:
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
@@ -3238,6 +3625,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
@@ -3262,7 +3664,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+ id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
@@ -3301,6 +3704,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
u64 sid, mask;
__be16 port;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
port = htons(bind_list->port);
@@ -3329,6 +3734,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list;
int ret;
+ lockdep_assert_held(&lock);
+
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
@@ -3355,6 +3762,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
struct sockaddr *saddr = cma_src_addr(id_priv);
__be16 dport = cma_port(daddr);
+ lockdep_assert_held(&lock);
+
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
struct sockaddr *cur_saddr = cma_src_addr(cur_id);
@@ -3394,9 +3803,11 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
unsigned int rover;
struct net *net = id_priv->id.route.addr.dev_addr.net;
+ lockdep_assert_held(&lock);
+
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = prandom_u32_max(remaining) + low;
retry:
if (last_used_port != rover) {
struct rdma_bind_list *bind_list;
@@ -3441,13 +3852,14 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
- cur_id->reuseaddr)
+ if (reuseaddr && cur_id->reuseaddr)
continue;
cur_addr = cma_src_addr(cur_id);
@@ -3471,6 +3883,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
unsigned short snum;
int ret;
+ lockdep_assert_held(&lock);
+
snum = ntohs(cma_port(cma_src_addr(id_priv)));
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -3486,18 +3900,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
return ret;
}
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
- int ret = 0;
-
- mutex_lock(&lock);
- if (bind_list->owners.first->next)
- ret = cma_check_port(bind_list, id_priv, 0);
- mutex_unlock(&lock);
- return ret;
-}
-
static enum rdma_ucm_port_space
cma_select_inet_ps(struct rdma_id_private *id_priv)
{
@@ -3591,28 +3993,41 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
- id->route.addr.src_addr.ss_family = AF_INET;
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ struct sockaddr_in any_in = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
if (ret)
return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN)))
+ return -EINVAL;
}
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
- return -EINVAL;
-
+ /*
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+ * any more, and has to be unique in the bind list.
+ */
if (id_priv->reuseaddr) {
- ret = cma_bind_listen(id_priv);
+ mutex_lock(&lock);
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+ if (!ret)
+ id_priv->reuseaddr = 0;
+ mutex_unlock(&lock);
if (ret)
goto err;
}
id_priv->backlog = backlog;
- if (id->device) {
+ if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id->device, 1)) {
ret = cma_ib_listen(id_priv);
if (ret)
@@ -3625,12 +4040,19 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
ret = -ENOSYS;
goto err;
}
- } else
- cma_listen_on_all(id_priv);
+ } else {
+ ret = cma_listen_on_all(id_priv);
+ if (ret)
+ goto err;
+ }
return 0;
err:
id_priv->backlog = 0;
+ /*
+ * All the failure paths that lead here will not allow the req_handler's
+ * to have run.
+ */
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
@@ -3683,9 +4105,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ if (!cma_any_addr(addr))
+ rdma_restrack_add(&id_priv->res);
return 0;
err2:
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev)
cma_release_dev(id_priv);
err1:
@@ -3731,10 +4154,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct rdma_cm_event event = {};
const struct ib_cm_sidr_rep_event_param *rep =
&ib_event->param.sidr_rep_rcvd;
- int ret = 0;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (ib_event->event) {
@@ -3781,14 +4204,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
@@ -3802,8 +4223,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3862,8 +4282,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3894,7 +4313,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
}
req.primary_path = &route->path_rec[0];
- if (route->num_paths == 2)
+ req.primary_path_inbound = route->path_rec_inbound;
+ req.primary_path_outbound = route->path_rec_outbound;
+ if (route->num_pri_alt_paths == 2)
req.alternate_path = &route->path_rec[1];
req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
@@ -3912,6 +4333,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ req.ece.vendor_id = id_priv->ece.vendor_id;
+ req.ece.attr_mod = id_priv->ece.attr_mod;
trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
@@ -3936,8 +4359,11 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
+ mutex_lock(&id_priv->qp_mutex);
cm_id->tos = id_priv->tos;
cm_id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+
id_priv->cm_id.iw = cm_id;
memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
@@ -3968,12 +4394,21 @@ out:
return ret;
}
-int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+/**
+ * rdma_connect_locked - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Same as rdma_connect() but can only be called from the
+ * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback.
+ */
+int rdma_connect_locked(struct rdma_cm_id *id,
+ struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3987,20 +4422,66 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_connect_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
+ }
if (ret)
- goto err;
-
+ goto err_state;
return 0;
-err:
+err_state:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
+EXPORT_SYMBOL(rdma_connect_locked);
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with by having
+ * called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP information
+ * for unconnected rdma_cm_id's. The actual operation is based on the
+ * rdma_cm_id's port space.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ mutex_lock(&id_priv->handler_mutex);
+ ret = rdma_connect_locked(id, conn_param);
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
EXPORT_SYMBOL(rdma_connect);
+/**
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ * @ece: ECE parameters
+ *
+ * See rdma_connect() explanation.
+ */
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_connect(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_connect_ece);
+
static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
@@ -4026,6 +4507,8 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
@@ -4050,9 +4533,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
- if (id_priv->id.qp) {
+ if (id_priv->id.qp)
iw_param.qpn = id_priv->qp_num;
- } else
+ else
iw_param.qpn = conn_param->qp_num;
return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
@@ -4073,7 +4556,11 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ret;
rep.qp_num = id_priv->qp_num;
rep.qkey = id_priv->qkey;
+
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
}
+
rep.private_data = private_data;
rep.private_data_len = private_data_len;
@@ -4081,17 +4568,33 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
-
- rdma_restrack_set_task(&id_priv->res, caller);
+ lockdep_assert_held(&id_priv->handler_mutex);
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
return -EINVAL;
if (!id->qp && conn_param) {
@@ -4115,21 +4618,52 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
else
ret = cma_rep_recv(id_priv);
}
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_accept_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
-
+ }
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(id_priv);
- rdma_reject(id, NULL, 0);
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
+
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -4153,7 +4687,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
EXPORT_SYMBOL(rdma_notify);
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
- u8 private_data_len)
+ u8 private_data_len, u8 reason)
{
struct rdma_id_private *id_priv;
int ret;
@@ -4168,15 +4702,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
private_data, private_data_len);
} else {
trace_cm_send_rej(id_priv);
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
- 0, private_data, private_data_len);
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
+ private_data, private_data_len);
}
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
- } else
+ } else {
ret = -ENOSYS;
+ }
return ret;
}
@@ -4213,70 +4747,68 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ struct ib_sa_multicast *multicast,
+ struct rdma_cm_event *event,
+ struct cma_multicast *mc)
{
- struct rdma_id_private *id_priv;
- struct cma_multicast *mc = multicast->context;
- struct rdma_cm_event event = {};
- int ret = 0;
-
- id_priv = mc->id_priv;
- mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
- goto out;
+ struct rdma_dev_addr *dev_addr;
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
else
pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
status);
- mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp) {
- status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- be16_to_cpu(multicast->rec.mlid));
- if (status)
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
- status);
+
+ event->status = status;
+ event->param.ud.private_data = mc->context;
+ if (status) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ return;
}
- mutex_unlock(&id_priv->qp_mutex);
- event.status = status;
- event.param.ud.private_data = mc->context;
- if (!status) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev =
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- enum ib_gid_type gid_type =
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
-
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num,
- &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
- if (ret)
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ gid_type =
+ id_priv->cma_dev
+ ->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+ &multicast->rec, ndev, gid_type,
+ &event->param.ud.ah_attr)) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ goto out;
+ }
- event.param.ud.qp_num = 0xFFFFFF;
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- if (ndev)
- dev_put(ndev);
- } else
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ event->param.ud.qp_num = 0xFFFFFF;
+ event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- ret = cma_cm_event_handler(id_priv, &event);
+out:
+ if (ndev)
+ dev_put(ndev);
+}
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ struct rdma_cm_event event = {};
+ int ret = 0;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+ goto out;
+
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
- if (ret) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
- return 0;
- }
+ WARN_ON(ret);
out:
mutex_unlock(&id_priv->handler_mutex);
@@ -4337,17 +4869,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
- (!ib_sa_sendonly_fullmem_support(&sa_client,
- id_priv->id.device,
- id_priv->id.port_num))) {
- dev_warn(
- &id_priv->id.device->dev,
- "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
- id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
-
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
@@ -4361,23 +4882,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
- id_priv->id.port_num, &rec,
- comp_mask, GFP_KERNEL,
- cma_ib_mc_handler, mc);
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
- struct cma_multicast *mc = mw->mc;
- struct ib_sa_multicast *m = mc->multicast.ib;
-
- mc->multicast.ib->context = mc;
- cma_ib_mc_handler(0, m);
- kref_put(&mc->mcref, release_mc);
- kfree(mw);
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec, comp_mask,
+ GFP_KERNEL, cma_ib_mc_handler, mc);
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
@@ -4412,52 +4920,41 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ struct ib_sa_multicast ib;
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ if (cma_zero_addr(addr))
return -EINVAL;
- work = kzalloc(sizeof *work, GFP_KERNEL);
- if (!work)
- return -ENOMEM;
-
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
- if (!mc->multicast.ib) {
- err = -ENOMEM;
- goto out1;
- }
-
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+ ib.rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (!ndev) {
- err = -ENODEV;
- goto out2;
- }
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
- mc->multicast.ib->rec.hop_limit = 1;
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ if (!ndev)
+ return -ENODEV;
+
+ ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
if (!send_only) {
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
true);
}
}
@@ -4466,44 +4963,35 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !mc->multicast.ib->rec.mtu) {
- if (!err)
- err = -EINVAL;
- goto out2;
- }
- rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
- work->id = id_priv;
- work->mc = mc;
- INIT_WORK(&work->work, iboe_mcast_work_handler);
- kref_get(&mc->mcref);
- queue_work(cma_wq, &work->work);
+ if (err || !ib.rec.mtu)
+ return err ?: -EINVAL;
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &ib.rec.port_gid);
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
+ queue_work(cma_wq, &mc->iboe_join.work);
return 0;
-
-out2:
- kfree(mc->multicast.ib);
-out1:
- kfree(work);
- return err;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
u8 join_state, void *context)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
struct cma_multicast *mc;
int ret;
- if (!id->device)
+ /* Not supported for kernel QPs */
+ if (WARN_ON(id->qp))
return -EINVAL;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+ /* ULP is calling this wrong. */
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
@@ -4513,7 +5001,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
mc->join_state = join_state;
if (rdma_protocol_roce(id->device, id->port_num)) {
- kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
if (ret)
goto out_err;
@@ -4545,25 +5032,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
-
- if (id->qp)
- ib_detach_mcast(id->qp,
- &mc->multicast.ib->rec.mgid,
- be16_to_cpu(mc->multicast.ib->rec.mlid));
-
- BUG_ON(id_priv->cma_dev->device != id->device);
-
- if (rdma_cap_ib_mcast(id->device, id->port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num)) {
- cma_leave_roce_mc_group(id_priv, mc);
- }
- return;
- }
+ if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+ continue;
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ WARN_ON(id_priv->cma_dev->device != id->device);
+ destroy_mc(id_priv, mc);
+ return;
}
spin_unlock_irq(&id_priv->lock);
}
@@ -4572,7 +5048,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
- struct cma_ndev_work *work;
+ struct cma_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -4585,10 +5061,10 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, cma_ndev_work_handler);
+ INIT_WORK(&work->work, cma_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
}
@@ -4611,7 +5087,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ list_for_each_entry(id_priv, &cma_dev->id_list, device_item) {
ret = cma_netdev_change(ndev, id_priv);
if (ret)
goto out;
@@ -4622,33 +5098,192 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
+ cma_id_get(current_id);
+ queue_work(cma_wq, &current_id->id.net_work);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
-static void cma_add_one(struct ib_device *device)
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
+{
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ mutex_lock(&id_priv->handler_mutex);
+ /* Record that we want to remove the device */
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+ return;
+ }
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ /*
+ * At this point the ULP promises it won't call
+ * rdma_destroy_id() concurrently
+ */
+ cma_id_put(id_priv);
+ mutex_unlock(&id_priv->handler_mutex);
+ trace_cm_id_destroy(id_priv);
+ _destroy_id(id_priv, state);
+ return;
+ }
+ mutex_unlock(&id_priv->handler_mutex);
+
+ /*
+ * If this races with destroy then the thread that first assigns state
+ * to a destroying does the cancel.
+ */
+ cma_cancel_operation(id_priv, state);
+ cma_id_put(id_priv);
+}
+
+static void cma_process_remove(struct cma_device *cma_dev)
+{
+ mutex_lock(&lock);
+ while (!list_empty(&cma_dev->id_list)) {
+ struct rdma_id_private *id_priv = list_first_entry(
+ &cma_dev->id_list, struct rdma_id_private, device_item);
+
+ list_del_init(&id_priv->listen_item);
+ list_del_init(&id_priv->device_item);
+ cma_id_get(id_priv);
+ mutex_unlock(&lock);
+
+ cma_send_device_removal_put(id_priv);
+
+ mutex_lock(&lock);
+ }
+ mutex_unlock(&lock);
+
+ cma_dev_put(cma_dev);
+ wait_for_completion(&cma_dev->comp);
+}
+
+static bool cma_supported(struct ib_device *device)
+{
+ u32 i;
+
+ rdma_for_each_port(device, i) {
+ if (rdma_cap_ib_cm(device, i) || rdma_cap_iw_cm(device, i))
+ return true;
+ }
+ return false;
+}
+
+static int cma_add_one(struct ib_device *device)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
- unsigned int i;
unsigned long supported_gids = 0;
+ int ret;
+ u32 i;
+
+ if (!cma_supported(device))
+ return -EOPNOTSUPP;
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
if (!cma_dev)
- return;
+ return -ENOMEM;
cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type)
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
goto free_cma_dev;
+ }
cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_roce_tos),
GFP_KERNEL);
- if (!cma_dev->default_roce_tos)
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
goto free_gid_type;
+ }
rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
@@ -4663,90 +5298,43 @@ static void cma_add_one(struct ib_device *device)
}
init_completion(&cma_dev->comp);
- atomic_set(&cma_dev->refcount, 1);
+ refcount_set(&cma_dev->refcount, 1);
INIT_LIST_HEAD(&cma_dev->id_list);
ib_set_client_data(device, &cma_client, cma_dev);
mutex_lock(&lock);
list_add_tail(&cma_dev->list, &dev_list);
- list_for_each_entry(id_priv, &listen_any_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_for_each_entry(id_priv, &listen_any_list, listen_any_item) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret)
+ goto free_listen;
+ }
mutex_unlock(&lock);
trace_cm_add_one(device);
- return;
+ return 0;
+
+free_listen:
+ list_del(&cma_dev->list);
+ mutex_unlock(&lock);
+ /* cma_process_remove() will delete to_destroy */
+ cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
free_gid_type:
kfree(cma_dev->default_gid_type);
free_cma_dev:
kfree(cma_dev);
-
- return;
-}
-
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
-{
- struct rdma_cm_event event = {};
- enum rdma_cm_state state;
- int ret = 0;
-
- /* Record that we want to remove the device */
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
- if (state == RDMA_CM_DESTROYING)
- return 0;
-
- cma_cancel_operation(id_priv, state);
- mutex_lock(&id_priv->handler_mutex);
-
- /* Check for destruction from another callback. */
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
- goto out;
-
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = cma_cm_event_handler(id_priv, &event);
-out:
- mutex_unlock(&id_priv->handler_mutex);
return ret;
}
-static void cma_process_remove(struct cma_device *cma_dev)
-{
- struct rdma_id_private *id_priv;
- int ret;
-
- mutex_lock(&lock);
- while (!list_empty(&cma_dev->id_list)) {
- id_priv = list_entry(cma_dev->id_list.next,
- struct rdma_id_private, list);
-
- list_del(&id_priv->listen_list);
- list_del_init(&id_priv->list);
- atomic_inc(&id_priv->refcount);
- mutex_unlock(&lock);
-
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
- cma_deref_id(id_priv);
- if (ret)
- rdma_destroy_id(&id_priv->id);
-
- mutex_lock(&lock);
- }
- mutex_unlock(&lock);
-
- cma_deref_dev(cma_dev);
- wait_for_completion(&cma_dev->comp);
-}
-
static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
trace_cm_remove_one(device);
- if (!cma_dev)
- return;
-
mutex_lock(&lock);
list_del(&cma_dev->list);
mutex_unlock(&lock);
@@ -4790,6 +5378,19 @@ static int __init cma_init(void)
{
int ret;
+ /*
+ * There is a rare lock ordering dependency in cma_netdev_callback()
+ * that only happens when bonding is enabled. Teach lockdep that rtnl
+ * must never be nested under lock so it can find these without having
+ * to test with bonding.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ rtnl_lock();
+ mutex_lock(&lock);
+ mutex_unlock(&lock);
+ rtnl_unlock();
+ }
+
cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
if (!cma_wq)
return -ENOMEM;
@@ -4800,6 +5401,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
@@ -4814,6 +5416,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -4826,6 +5429,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 8b0b5ae22e4c..7b68b3ea979f 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -43,7 +42,7 @@ struct cma_device;
struct cma_dev_group;
struct cma_dev_port_group {
- unsigned int port_num;
+ u32 port_num;
struct cma_dev_group *cma_dev_group;
struct config_group group;
};
@@ -94,7 +93,7 @@ static int cma_configfs_params_get(struct config_item *item,
static void cma_configfs_params_put(struct cma_device *cma_dev)
{
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
}
static ssize_t default_roce_mode_show(struct config_item *item,
@@ -115,7 +114,7 @@ static ssize_t default_roce_mode_show(struct config_item *item,
if (gid_type < 0)
return gid_type;
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+ return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type));
}
static ssize_t default_roce_mode_store(struct config_item *item,
@@ -123,16 +122,19 @@ static ssize_t default_roce_mode_store(struct config_item *item,
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
- int gid_type = ib_cache_gid_parse_type_str(buf);
+ int gid_type;
ssize_t ret;
- if (gid_type < 0)
- return -EINVAL;
-
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
+ gid_type = ib_cache_gid_parse_type_str(buf);
+ if (gid_type < 0) {
+ cma_configfs_params_put(cma_dev);
+ return -EINVAL;
+ }
+
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
@@ -156,7 +158,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
tos = cma_get_default_roce_tos(cma_dev, group->port_num);
cma_configfs_params_put(cma_dev);
- return sprintf(buf, "%u\n", tos);
+ return sysfs_emit(buf, "%u\n", tos);
}
static ssize_t default_roce_tos_store(struct config_item *item,
@@ -197,11 +199,10 @@ static const struct config_item_type cma_port_group_type = {
static int make_cma_ports(struct cma_dev_group *cma_dev_group,
struct cma_device *cma_dev)
{
- struct ib_device *ibdev;
- unsigned int i;
- unsigned int ports_num;
struct cma_dev_port_group *ports;
- int err;
+ struct ib_device *ibdev;
+ u32 ports_num;
+ u32 i;
ibdev = cma_get_ib_dev(cma_dev);
@@ -212,10 +213,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
- if (!ports) {
- err = -ENOMEM;
- goto free;
- }
+ if (!ports)
+ return -ENOMEM;
for (i = 0; i < ports_num; i++) {
char port_str[10];
@@ -231,12 +230,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
-
return 0;
-free:
- kfree(ports);
- cma_dev_group->ports = NULL;
- return err;
}
static void release_cma_dev(struct config_item *item)
@@ -298,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
@@ -312,18 +306,31 @@ static struct config_group *make_cma_dev(struct config_group *group,
configfs_add_default_group(&cma_dev_group->ports_group,
&cma_dev_group->device_group);
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
return &cma_dev_group->device_group;
fail:
if (cma_dev)
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
kfree(cma_dev_group);
return ERR_PTR(err);
}
+static void drop_cma_dev(struct config_group *cgroup, struct config_item *item)
+{
+ struct config_group *group =
+ container_of(item, struct config_group, cg_item);
+ struct cma_dev_group *cma_dev_group =
+ container_of(group, struct cma_dev_group, device_group);
+
+ configfs_remove_default_groups(&cma_dev_group->ports_group);
+ configfs_remove_default_groups(&cma_dev_group->device_group);
+ config_item_put(item);
+}
+
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
+ .drop_item = drop_cma_dev,
};
static const struct config_item_type cma_subsys_type = {
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index ca7307277518..b7354c94cf1b 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -55,8 +55,16 @@ struct rdma_id_private {
struct rdma_bind_list *bind_list;
struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
+ union {
+ struct list_head device_item; /* On cma_device->id_list */
+ struct list_head listen_any_item; /* On listen_any_list */
+ };
+ union {
+ /* On rdma_id_private->listen_list */
+ struct list_head listen_item;
+ struct list_head listen_list;
+ };
+ struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
@@ -66,7 +74,7 @@ struct rdma_id_private {
struct mutex qp_mutex;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct mutex handler_mutex;
int backlog;
@@ -86,15 +94,19 @@ struct rdma_id_private {
u8 tos;
u8 tos_set:1;
u8 timeout_set:1;
+ u8 min_rnr_timer_set:1;
u8 reuseaddr;
u8 afonly;
u8 timeout;
+ u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
* Internal to RDMA/core, don't use in the drivers
*/
struct rdma_restrack_entry res;
+ struct rdma_ucm_ece ece;
};
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
@@ -111,16 +123,16 @@ static inline void cma_configfs_exit(void)
}
#endif
-void cma_ref_dev(struct cma_device *dev);
-void cma_deref_dev(struct cma_device *dev);
+void cma_dev_get(struct cma_device *dev);
+void cma_dev_put(struct cma_device *dev);
typedef bool (*cma_device_filter)(struct ib_device *, void *);
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
void *cookie);
-int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
-int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+int cma_get_default_gid_type(struct cma_device *dev, u32 port);
+int cma_set_default_gid_type(struct cma_device *dev, u32 port,
enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+int cma_get_default_roce_tos(struct cma_device *dev, u32 port);
+int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *dev);
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index 81e36bf13159..e45264267bcc 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -17,46 +17,6 @@
#include <linux/tracepoint.h>
#include <trace/events/rdma.h>
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST \
- ib_cm_event(REQ_ERROR) \
- ib_cm_event(REQ_RECEIVED) \
- ib_cm_event(REP_ERROR) \
- ib_cm_event(REP_RECEIVED) \
- ib_cm_event(RTU_RECEIVED) \
- ib_cm_event(USER_ESTABLISHED) \
- ib_cm_event(DREQ_ERROR) \
- ib_cm_event(DREQ_RECEIVED) \
- ib_cm_event(DREP_RECEIVED) \
- ib_cm_event(TIMEWAIT_EXIT) \
- ib_cm_event(MRA_RECEIVED) \
- ib_cm_event(REJ_RECEIVED) \
- ib_cm_event(LAP_ERROR) \
- ib_cm_event(LAP_RECEIVED) \
- ib_cm_event(APR_RECEIVED) \
- ib_cm_event(SIDR_REQ_ERROR) \
- ib_cm_event(SIDR_REQ_RECEIVED) \
- ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) { IB_CM_##x, #x },
-#define ib_cm_event_end(x) { IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
- __print_symbolic(x, IB_CM_EVENT_LIST)
-
DECLARE_EVENT_CLASS(cma_fsm_class,
TP_PROTO(
@@ -103,23 +63,33 @@ DEFINE_CMA_FSM_EVENT(sent_drep);
DEFINE_CMA_FSM_EVENT(sent_dreq);
DEFINE_CMA_FSM_EVENT(id_destroy);
-TRACE_EVENT(cm_id_create,
+TRACE_EVENT(cm_id_attach,
TP_PROTO(
- const struct rdma_id_private *id_priv
+ const struct rdma_id_private *id_priv,
+ const struct ib_device *device
),
- TP_ARGS(id_priv),
+ TP_ARGS(id_priv, device),
TP_STRUCT__entry(
__field(u32, cm_id)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ __string(devname, device->name)
),
TP_fast_assign(
__entry->cm_id = id_priv->res.id;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ __assign_str(devname, device->name);
),
- TP_printk("cm.id=%u",
- __entry->cm_id
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __get_str(devname)
)
);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cf42acca4a3a..f66f48d860ec 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,6 +44,7 @@
#include <rdma/ib_mad.h>
#include <rdma/restrack.h>
#include "mad_priv.h"
+#include "restrack.h"
/* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 8192
@@ -77,19 +78,17 @@ static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
return net_generic(net, rdma_dev_net_id);
}
-int ib_device_register_sysfs(struct ib_device *device);
-void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim);
-typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
+typedef void (*roce_netdev_callback)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
-typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
+typedef bool (*roce_netdev_filter)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
- unsigned int port);
+ u32 port);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
@@ -112,7 +111,7 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct ib_client_nl_info {
struct sk_buff *nl_msg;
struct device *cdev;
- unsigned int port;
+ u32 port;
u64 abi;
};
int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
@@ -127,24 +126,24 @@ int ib_cache_gid_parse_type_str(const char *buf);
const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev);
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
@@ -213,15 +212,15 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
-int ib_get_cached_subnet_prefix(struct ib_device *device,
- u8 port_num,
- u64 *sn_pfx);
+void ib_get_cached_subnet_prefix(struct ib_device *device,
+ u32 port_num,
+ u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
void ib_security_release_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix);
int ib_security_modify_qp(struct ib_qp *qp,
@@ -246,7 +245,7 @@ static inline void ib_security_release_port_pkey_list(struct ib_device *device)
}
static inline void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
}
@@ -317,58 +316,13 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
void nldev_init(void);
void nldev_exit(void);
-static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
- struct ib_pd *pd,
- struct ib_qp_init_attr *attr,
- struct ib_udata *udata,
- struct ib_uqp_object *uobj)
-{
- enum ib_qp_type qp_type = attr->qp_type;
- struct ib_qp *qp;
- bool is_xrc;
-
- if (!dev->ops.create_qp)
- return ERR_PTR(-EOPNOTSUPP);
-
- qp = dev->ops.create_qp(pd, attr, udata);
- if (IS_ERR(qp))
- return qp;
-
- qp->device = dev;
- qp->pd = pd;
- qp->uobject = uobj;
- qp->real_qp = qp;
-
- qp->qp_type = attr->qp_type;
- qp->rwq_ind_tbl = attr->rwq_ind_tbl;
- qp->send_cq = attr->send_cq;
- qp->recv_cq = attr->recv_cq;
- qp->srq = attr->srq;
- qp->rwq_ind_tbl = attr->rwq_ind_tbl;
- qp->event_handler = attr->event_handler;
-
- atomic_set(&qp->usecnt, 0);
- spin_lock_init(&qp->mr_lock);
- INIT_LIST_HEAD(&qp->rdma_mrs);
- INIT_LIST_HEAD(&qp->sig_mrs);
-
- /*
- * We don't track XRC QPs for now, because they don't have PD
- * and more importantly they are created internaly by driver,
- * see mlx5 create_dev_resources() as an example.
- */
- is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
- if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
- qp->res.type = RDMA_RESTRACK_QP;
- if (uobj)
- rdma_restrack_uadd(&qp->res);
- else
- rdma_restrack_kadd(&qp->res);
- } else
- qp->res.valid = false;
-
- return qp;
-}
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp);
+void ib_qp_usecnt_dec(struct ib_qp *qp);
struct rdma_dev_addr;
int rdma_resolve_ip_route(struct sockaddr *src_addr,
@@ -390,13 +344,16 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
void ib_free_port_attrs(struct ib_core_device *coredev);
int ib_setup_port_attrs(struct ib_core_device *coredev);
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev, u32 port_num);
+void ib_device_release_hw_stats(struct hw_stats_device_data *data);
+int ib_setup_device_attrs(struct ib_device *ibdev);
int rdma_compatdev_set(u8 enable);
-int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
- struct kobject *kobj, struct kobj_type *ktype,
- const char *name);
-void ib_port_unregister_module_stat(struct kobject *kobj);
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
int ib_device_set_netns_put(struct sk_buff *skb,
struct ib_device *dev, u32 ns_fd);
@@ -414,4 +371,6 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv,
struct vm_area_struct *vma,
struct rdma_user_mmap_entry *entry);
+void ib_cq_pool_cleanup(struct ib_device *dev);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 2257d7f7810f..af59486fe418 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -8,33 +8,43 @@
#include "core_priv.h"
#include "restrack.h"
-#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE)
+#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
-static int __counter_set_mode(struct rdma_counter_mode *curr,
+static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
enum rdma_nl_counter_mask new_mask)
{
- if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
- ((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
- (curr->mode != RDMA_COUNTER_MODE_NONE)))
- return -EINVAL;
+ if (new_mode == RDMA_COUNTER_MODE_AUTO) {
+ if (new_mask & (~ALL_AUTO_MODE_MASKS))
+ return -EINVAL;
+ if (port_counter->num_counters)
+ return -EBUSY;
+ }
- curr->mode = new_mode;
- curr->mask = new_mask;
+ port_counter->mode.mode = new_mode;
+ port_counter->mode.mask = new_mask;
return 0;
}
-/**
+/*
* rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
*
- * When @on is true, the @mask must be set; When @on is false, it goes
- * into manual mode if there's any counter, so that the user is able to
- * manually access them.
+ * @dev: Device to operate
+ * @port: Port to use
+ * @mask: Mask to configure
+ * @extack: Message to the user
+ *
+ * Return 0 on success. If counter mode wasn't changed then it is considered
+ * as success as well.
+ * Return -EBUSY when changing to auto mode while there are bounded counters.
+ *
*/
-int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
- bool on, enum rdma_nl_counter_mask mask)
+int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mask mask,
+ struct netlink_ext_ack *extack)
{
struct rdma_port_counter *port_counter;
+ enum rdma_nl_counter_mode mode;
int ret;
port_counter = &dev->port_data[port].port_counter;
@@ -42,30 +52,95 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
return -EOPNOTSUPP;
mutex_lock(&port_counter->lock);
- if (on) {
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_AUTO, mask);
- } else {
- if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
- ret = -EINVAL;
- goto out;
- }
-
- if (port_counter->num_counters)
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
- else
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_NONE, 0);
+ if (mask)
+ mode = RDMA_COUNTER_MODE_AUTO;
+ else
+ mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
+ RDMA_COUNTER_MODE_NONE;
+
+ if (port_counter->mode.mode == mode &&
+ port_counter->mode.mask == mask) {
+ ret = 0;
+ goto out;
}
+ ret = __counter_set_mode(port_counter, mode, mask);
+
out:
mutex_unlock(&port_counter->lock);
+ if (ret == -EBUSY)
+ NL_SET_ERR_MSG(
+ extack,
+ "Modifying auto mode is not allowed when there is a bound QP");
+ return ret;
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+ unsigned int index, bool enable)
+{
+ struct rdma_hw_stats *stats;
+ int ret = 0;
+
+ if (!dev->ops.modify_hw_stat)
+ return -EOPNOTSUPP;
+
+ stats = ib_get_hw_stats_port(dev, port);
+ if (!stats || index >= stats->num_counters ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ mutex_lock(&stats->lock);
+
+ if (enable != test_bit(index, stats->is_disabled))
+ goto out;
+
+ ret = dev->ops.modify_hw_stat(dev, port, index, enable);
+ if (ret)
+ goto out;
+
+ if (enable)
+ clear_bit(index, stats->is_disabled);
+ else
+ set_bit(index, stats->is_disabled);
+out:
+ mutex_unlock(&stats->lock);
return ret;
}
-static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
- enum rdma_nl_counter_mode mode)
+static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
+ struct ib_qp *qp,
+ enum rdma_nl_counter_mode mode)
{
struct rdma_port_counter *port_counter;
struct rdma_counter *counter;
@@ -80,18 +155,30 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
counter->device = dev;
counter->port = port;
- counter->res.type = RDMA_RESTRACK_COUNTER;
- counter->stats = dev->ops.counter_alloc_stats(counter);
+
+ rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+ counter->stats = dev->ops.counter_alloc_stats(counter);
if (!counter->stats)
goto err_stats;
port_counter = &dev->port_data[port].port_counter;
mutex_lock(&port_counter->lock);
- if (mode == RDMA_COUNTER_MODE_MANUAL) {
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
- if (ret)
+ switch (mode) {
+ case RDMA_COUNTER_MODE_MANUAL:
+ ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
+ 0);
+ if (ret) {
+ mutex_unlock(&port_counter->lock);
goto err_mode;
+ }
+ break;
+ case RDMA_COUNTER_MODE_AUTO:
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
}
port_counter->num_counters++;
@@ -101,12 +188,18 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
kref_init(&counter->kref);
mutex_init(&counter->lock);
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret)
+ goto err_mode;
+
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
return counter;
err_mode:
- mutex_unlock(&port_counter->lock);
- kfree(counter->stats);
+ rdma_free_hw_stats_struct(counter->stats);
err_stats:
+ rdma_restrack_put(&counter->res);
kfree(counter);
return NULL;
}
@@ -120,71 +213,29 @@ static void rdma_counter_free(struct rdma_counter *counter)
port_counter->num_counters--;
if (!port_counter->num_counters &&
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
- __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
- 0);
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
mutex_unlock(&port_counter->lock);
rdma_restrack_del(&counter->res);
- kfree(counter->stats);
+ rdma_free_hw_stats_struct(counter->stats);
kfree(counter);
}
-static void auto_mode_init_counter(struct rdma_counter *counter,
- const struct ib_qp *qp,
- enum rdma_nl_counter_mask new_mask)
-{
- struct auto_mode_param *param = &counter->mode.param;
-
- counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
- counter->mode.mask = new_mask;
-
- if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
- param->qp_type = qp->qp_type;
-}
-
static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
enum rdma_nl_counter_mask auto_mask)
{
struct auto_mode_param *param = &counter->mode.param;
bool match = true;
- /*
- * Ensure that counter belongs to the right PID. This operation can
- * race with user space which kills the process and leaves QP and
- * counters orphans.
- *
- * It is not a big deal because exitted task will leave both QP and
- * counter in the same bucket of zombie process. Just ensure that
- * process is still alive before procedding.
- *
- */
- if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) ||
- !task_pid_nr(qp->res.task))
- return false;
-
if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
match &= (param->qp_type == qp->qp_type);
- return match;
-}
-
-static int __rdma_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- int ret;
-
- if (qp->counter)
- return -EINVAL;
+ if (auto_mask & RDMA_COUNTER_MASK_PID)
+ match &= (task_pid_nr(counter->res.task) ==
+ task_pid_nr(qp->res.task));
- if (!qp->device->ops.counter_bind_qp)
- return -EOPNOTSUPP;
-
- mutex_lock(&counter->lock);
- ret = qp->device->ops.counter_bind_qp(counter, qp);
- mutex_unlock(&counter->lock);
-
- return ret;
+ return match;
}
static int __rdma_counter_unbind_qp(struct ib_qp *qp)
@@ -202,7 +253,7 @@ static int __rdma_counter_unbind_qp(struct ib_qp *qp)
return ret;
}
-static void counter_history_stat_update(const struct rdma_counter *counter)
+static void counter_history_stat_update(struct rdma_counter *counter)
{
struct ib_device *dev = counter->device;
struct rdma_port_counter *port_counter;
@@ -212,18 +263,20 @@ static void counter_history_stat_update(const struct rdma_counter *counter)
if (!port_counter->hstats)
return;
+ rdma_counter_query_stats(counter);
+
for (i = 0; i < counter->stats->num_counters; i++)
port_counter->hstats->value[i] += counter->stats->value[i];
}
-/**
+/*
* rdma_get_counter_auto_mode - Find the counter that @qp should be bound
* with in auto mode
*
* Return: The counter (with ref-count increased) if found
*/
static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
- u8 port)
+ u32 port)
{
struct rdma_port_counter *port_counter;
struct rdma_counter *counter = NULL;
@@ -253,18 +306,6 @@ next:
return counter;
}
-static void rdma_counter_res_add(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- if (rdma_is_kernel_res(&qp->res)) {
- rdma_restrack_set_task(&counter->res, qp->res.kern_name);
- rdma_restrack_kadd(&counter->res);
- } else {
- rdma_restrack_attach_task(&counter->res, qp->res.task);
- rdma_restrack_uadd(&counter->res);
- }
-}
-
static void counter_release(struct kref *kref)
{
struct rdma_counter *counter;
@@ -275,18 +316,18 @@ static void counter_release(struct kref *kref)
rdma_counter_free(counter);
}
-/**
+/*
* rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
* the auto-mode rule
*/
-int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
{
struct rdma_port_counter *port_counter;
struct ib_device *dev = qp->device;
struct rdma_counter *counter;
int ret;
- if (!qp->res.valid)
+ if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
return 0;
if (!rdma_is_port_valid(dev, port))
@@ -304,25 +345,15 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
return ret;
}
} else {
- counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
if (!counter)
return -ENOMEM;
-
- auto_mode_init_counter(counter, qp, port_counter->mode.mask);
-
- ret = __rdma_counter_bind_qp(counter, qp);
- if (ret) {
- rdma_counter_free(counter);
- return ret;
- }
-
- rdma_counter_res_add(counter, qp);
}
return 0;
}
-/**
+/*
* rdma_counter_unbind_qp - Unbind a qp from a counter
* @force:
* true - Decrease the counter ref-count anyway (e.g., qp destroy)
@@ -359,7 +390,7 @@ int rdma_counter_query_stats(struct rdma_counter *counter)
}
static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
- u8 port, u32 index)
+ u32 port, u32 index)
{
struct rdma_restrack_entry *res;
struct rdma_restrack_root *rt;
@@ -391,11 +422,11 @@ next:
return sum;
}
-/**
+/*
* rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
* specific port, including the running ones and history data
*/
-u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index)
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
{
struct rdma_port_counter *port_counter;
u64 sum;
@@ -430,15 +461,6 @@ err:
return NULL;
}
-static int rdma_counter_bind_qp_manual(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- if ((counter->device != qp->device) || (counter->port != qp->port))
- return -EINVAL;
-
- return __rdma_counter_bind_qp(counter, qp);
-}
-
static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
u32 counter_id)
{
@@ -456,10 +478,10 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
return counter;
}
-/**
+/*
* rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
*/
-int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id)
{
struct rdma_port_counter *port_counter;
@@ -481,12 +503,17 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
goto err;
}
- if (counter->res.task != qp->res.task) {
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ if ((counter->device != qp->device) || (counter->port != qp->port)) {
ret = -EINVAL;
goto err_task;
}
- ret = rdma_counter_bind_qp_manual(counter, qp);
+ ret = __rdma_counter_bind_qp(counter, qp);
if (ret)
goto err_task;
@@ -500,11 +527,11 @@ err:
return ret;
}
-/**
+/*
* rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
* The id of new counter is returned in @counter_id
*/
-int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
u32 qp_num, u32 *counter_id)
{
struct rdma_port_counter *port_counter;
@@ -531,35 +558,27 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
goto err;
}
- counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
if (!counter) {
ret = -ENOMEM;
goto err;
}
- ret = rdma_counter_bind_qp_manual(counter, qp);
- if (ret)
- goto err_bind;
-
if (counter_id)
*counter_id = counter->id;
- rdma_counter_res_add(counter, qp);
-
rdma_restrack_put(&qp->res);
- return ret;
+ return 0;
-err_bind:
- rdma_counter_free(counter);
err:
rdma_restrack_put(&qp->res);
return ret;
}
-/**
+/*
* rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
*/
-int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id)
{
struct rdma_port_counter *port_counter;
@@ -592,7 +611,7 @@ out:
return ret;
}
-int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask)
{
@@ -618,10 +637,10 @@ void rdma_counter_init(struct ib_device *dev)
port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
mutex_init(&port_counter->lock);
- if (!dev->ops.alloc_hw_stats)
+ if (!dev->ops.alloc_hw_port_stats)
continue;
- port_counter->hstats = dev->ops.alloc_hw_stats(dev, port);
+ port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
if (!port_counter->hstats)
goto fail;
}
@@ -631,7 +650,7 @@ void rdma_counter_init(struct ib_device *dev)
fail:
for (i = port; i >= rdma_start_port(dev); i--) {
port_counter = &dev->port_data[port].port_counter;
- kfree(port_counter->hstats);
+ rdma_free_hw_stats_struct(port_counter->hstats);
port_counter->hstats = NULL;
mutex_destroy(&port_counter->lock);
}
@@ -644,7 +663,7 @@ void rdma_counter_release(struct ib_device *dev)
rdma_for_each_port(dev, port) {
port_counter = &dev->port_data[port].port_counter;
- kfree(port_counter->hstats);
+ rdma_free_hw_stats_struct(port_counter->hstats);
mutex_destroy(&port_counter->lock);
}
}
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 4f25b2400694..a70876a0a231 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -2,12 +2,15 @@
/*
* Copyright (c) 2015 HGST, a Western Digital Company.
*/
-#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
#include <trace/events/rdma_core.h>
+/* Max size for shared CQ, may require tuning */
+#define IB_MAX_SHARED_CQ_SZ 4096U
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
@@ -68,6 +71,15 @@ static void rdma_dim_init(struct ib_cq *cq)
INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
}
+static void rdma_dim_destroy(struct ib_cq *cq)
+{
+ if (!cq->dim)
+ return;
+
+ cancel_work_sync(&cq->dim->work);
+ kfree(cq->dim);
+}
+
static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
int rc;
@@ -110,7 +122,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
}
/**
- * ib_process_direct_cq - process a CQ in caller context
+ * ib_process_cq_direct - process a CQ in caller context
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
@@ -184,24 +196,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * __ib_alloc_cq_user - allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
- * @udata: Valid user data or NULL for kernel object
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -218,20 +228,19 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
+ cq->comp_vector = comp_vector;
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
goto out_free_cq;
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = dev->ops.create_cq(cq, &cq_attr, NULL);
if (ret)
goto out_free_wc;
- rdma_restrack_kadd(&cq->res);
-
rdma_dim_init(cq);
switch (cq->poll_ctx) {
@@ -257,20 +266,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
goto out_destroy_cq;
}
+ rdma_restrack_add(&cq->res);
trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
out_destroy_cq:
- rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
+ rdma_dim_destroy(cq);
+ cq->device->ops.destroy_cq(cq, NULL);
out_free_wc:
+ rdma_restrack_put(&cq->res);
kfree(cq->wc);
out_free_cq:
kfree(cq);
trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* __ib_alloc_cq_any - allocate a completion queue
@@ -295,20 +306,23 @@ struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
atomic_inc_return(&counter) %
min_t(int, dev->num_comp_vectors, num_online_cpus());
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- caller, NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller);
}
EXPORT_SYMBOL(__ib_alloc_cq_any);
/**
- * ib_free_cq_user - free a completion queue
+ * ib_free_cq - free a completion queue
* @cq: completion queue to free.
- * @udata: User data or NULL for kernel object
*/
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
{
+ int ret;
+
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
+ if (WARN_ON_ONCE(cq->cqe_used))
+ return;
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -324,13 +338,170 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
WARN_ON_ONCE(1);
}
+ rdma_dim_destroy(cq);
trace_cq_free(cq);
+ ret = cq->device->ops.destroy_cq(cq, NULL);
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
- if (cq->dim)
- cancel_work_sync(&cq->dim->work);
- kfree(cq->dim);
kfree(cq->wc);
kfree(cq);
}
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_free_cq);
+
+void ib_cq_pool_cleanup(struct ib_device *dev)
+{
+ struct ib_cq *cq, *n;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) {
+ list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
+ pool_entry) {
+ WARN_ON(cq->cqe_used);
+ list_del(&cq->pool_entry);
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ }
+}
+
+static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes,
+ enum ib_poll_context poll_ctx)
+{
+ LIST_HEAD(tmp_list);
+ unsigned int nr_cqs, i;
+ struct ib_cq *cq, *n;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate at least as many CQEs as requested, and otherwise
+ * a reasonable batch size so that we can share CQs between
+ * multiple users instead of allocating a larger number of CQs.
+ */
+ nr_cqes = min_t(unsigned int, dev->attrs.max_cqe,
+ max(nr_cqes, IB_MAX_SHARED_CQ_SZ));
+ nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ for (i = 0; i < nr_cqs; i++) {
+ cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto out_free_cqs;
+ }
+ cq->shared = true;
+ list_add_tail(&cq->pool_entry, &tmp_list);
+ }
+
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_splice(&tmp_list, &dev->cq_pools[poll_ctx]);
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return 0;
+
+out_free_cqs:
+ list_for_each_entry_safe(cq, n, &tmp_list, pool_entry) {
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ return ret;
+}
+
+/**
+ * ib_cq_pool_get() - Find the least used completion queue that matches
+ * a given cpu hint (or least used for wild card affinity) and fits
+ * nr_cqe.
+ * @dev: rdma device
+ * @nr_cqe: number of needed cqe entries
+ * @comp_vector_hint: completion vector hint (-1) for the driver to assign
+ * a comp vector based on internal counter
+ * @poll_ctx: cq polling context
+ *
+ * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and
+ * claim entries in it for us. In case there is no available cq, allocate
+ * a new cq with the requirements and add it to the device pool.
+ * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
+ * for @poll_ctx.
+ */
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx)
+{
+ static unsigned int default_comp_vector;
+ unsigned int vector, num_comp_vectors;
+ struct ib_cq *cq, *found = NULL;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return ERR_PTR(-EINVAL);
+ }
+
+ num_comp_vectors =
+ min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ /* Project the affinty to the device completion vector range */
+ if (comp_vector_hint < 0) {
+ comp_vector_hint =
+ (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors;
+ WRITE_ONCE(default_comp_vector, comp_vector_hint);
+ }
+ vector = comp_vector_hint % num_comp_vectors;
+
+ /*
+ * Find the least used CQ with correct affinity and
+ * enough free CQ entries
+ */
+ while (!found) {
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_for_each_entry(cq, &dev->cq_pools[poll_ctx],
+ pool_entry) {
+ /*
+ * Check to see if we have found a CQ with the
+ * correct completion vector
+ */
+ if (vector != cq->comp_vector)
+ continue;
+ if (cq->cqe_used + nr_cqe > cq->cqe)
+ continue;
+ found = cq;
+ break;
+ }
+
+ if (found) {
+ found->cqe_used += nr_cqe;
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return found;
+ }
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ /*
+ * Didn't find a match or ran out of CQs in the device
+ * pool, allocate a new array of CQs.
+ */
+ ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return found;
+}
+EXPORT_SYMBOL(ib_cq_pool_get);
+
+/**
+ * ib_cq_pool_put - Return a CQ taken from a shared pool.
+ * @cq: The CQ to return.
+ * @nr_cqe: The max number of cqes that the user had requested.
+ */
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe)
+{
+ if (WARN_ON_ONCE(nr_cqe > cq->cqe_used))
+ return;
+
+ spin_lock_irq(&cq->device->cq_pools_lock);
+ cq->cqe_used -= nr_cqe;
+ spin_unlock_irq(&cq->device->cq_pools_lock);
+}
+EXPORT_SYMBOL(ib_cq_pool_put);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f6c255202d7f..b69e2c4e4d2a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
+static struct workqueue_struct *ib_unreg_wq;
/*
* Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -272,7 +273,6 @@ static void ib_device_check_mandatory(struct ib_device *device)
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
- IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_qp),
@@ -285,6 +285,7 @@ static void ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(reg_user_mr),
IB_MANDATORY_FUNC(dereg_mr),
IB_MANDATORY_FUNC(get_port_immutable)
};
@@ -421,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
return ret;
}
- strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+ strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
ret = rename_compat_devs(ibdev);
downgrade_write(&devices_rwsem);
@@ -491,6 +492,8 @@ static void ib_device_release(struct device *device)
free_netdevs(dev);
WARN_ON(refcount_read(&dev->refcount));
+ if (dev->hw_stats_data)
+ ib_device_release_hw_stats(dev->hw_stats_data);
if (dev->port_data) {
ib_cache_release_one(dev);
ib_security_release_port_pkey_list(dev);
@@ -570,6 +573,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
struct ib_device *_ib_alloc_device(size_t size)
{
struct ib_device *device;
+ unsigned int i;
if (WARN_ON(size < sizeof(struct ib_device)))
return NULL;
@@ -583,7 +587,6 @@ struct ib_device *_ib_alloc_device(size_t size)
return NULL;
}
- device->groups[0] = &ib_dev_attr_group;
rdma_init_coredev(&device->coredev, device, &init_net);
INIT_LIST_HEAD(&device->event_handler_list);
@@ -601,6 +604,43 @@ struct ib_device *_ib_alloc_device(size_t size)
init_completion(&device->unreg_completion);
INIT_WORK(&device->unregistration_work, ib_unregister_work);
+ spin_lock_init(&device->cq_pools_lock);
+ for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
+ INIT_LIST_HEAD(&device->cq_pools[i]);
+
+ rwlock_init(&device->cache_lock);
+
+ device->uverbs_cmd_mask =
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
return device;
}
EXPORT_SYMBOL(_ib_alloc_device);
@@ -677,8 +717,20 @@ static int add_client_context(struct ib_device *device,
if (ret)
goto out;
downgrade_write(&device->client_data_rwsem);
- if (client->add)
- client->add(device);
+ if (client->add) {
+ if (client->add(device)) {
+ /*
+ * If a client fails to add then the error code is
+ * ignored, but we won't call any more ops on this
+ * client.
+ */
+ xa_erase(&device->client_data, client->client_id);
+ up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
+ return 0;
+ }
+ }
/* Readers shall not see a client until add has been completed */
xa_set_mark(&device->client_data, client->client_id,
@@ -731,7 +783,7 @@ static void remove_client_context(struct ib_device *device,
static int alloc_port_data(struct ib_device *device)
{
struct ib_port_data_rcu *pdata_rcu;
- unsigned int port;
+ u32 port;
if (device->port_data)
return 0;
@@ -740,6 +792,10 @@ static int alloc_port_data(struct ib_device *device)
if (WARN_ON(!device->phys_port_cnt))
return -EINVAL;
+ /* Reserve U32_MAX so the logic to go over all the ports is sane */
+ if (WARN_ON(device->phys_port_cnt == U32_MAX))
+ return -EINVAL;
+
/*
* device->port_data is indexed directly by the port number to make
* access to this data as efficient as possible.
@@ -771,7 +827,7 @@ static int alloc_port_data(struct ib_device *device)
return 0;
}
-static int verify_immutable(const struct ib_device *dev, u8 port)
+static int verify_immutable(const struct ib_device *dev, u32 port)
{
return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
rdma_max_mad_size(dev, port) != 0);
@@ -779,7 +835,7 @@ static int verify_immutable(const struct ib_device *dev, u8 port)
static int setup_port_data(struct ib_device *device)
{
- unsigned int port;
+ u32 port;
int ret;
ret = alloc_port_data(device);
@@ -800,6 +856,20 @@ static int setup_port_data(struct ib_device *device)
return 0;
}
+/**
+ * ib_port_immutable_read() - Read rdma port's immutable data
+ * @dev: IB device
+ * @port: port number whose immutable data to read. It starts with index 1 and
+ * valid upto including rdma_end_port().
+ */
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port)
+{
+ WARN_ON(!rdma_is_port_valid(dev, port));
+ return &dev->port_data[port].immutable;
+}
+EXPORT_SYMBOL(ib_port_immutable_read);
+
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->ops.get_dev_fw_str)
@@ -820,15 +890,8 @@ static void ib_policy_change_task(struct work_struct *work)
rdma_for_each_port (dev, i) {
u64 sp;
- int ret = ib_get_cached_subnet_prefix(dev,
- i,
- &sp);
-
- WARN_ONCE(ret,
- "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
- ret);
- if (!ret)
- ib_security_cache_change(dev, i, sp);
+ ib_get_cached_subnet_prefix(dev, i, &sp);
+ ib_security_cache_change(dev, i, sp);
}
}
up_read(&devices_rwsem);
@@ -896,7 +959,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
- dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
ret = device_add(&cdev->dev);
if (ret)
@@ -1152,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name)
ret = -ENFILE;
goto out;
}
- strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+ strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
&last_id, GFP_KERNEL);
@@ -1164,56 +1229,6 @@ out:
return ret;
}
-static void setup_dma_device(struct ib_device *device)
-{
- struct device *parent = device->dev.parent;
-
- WARN_ON_ONCE(device->dma_device);
- if (device->dev.dma_ops) {
- /*
- * The caller provided custom DMA operations. Copy the
- * DMA-related fields that are used by e.g. dma_alloc_coherent()
- * into device->dev.
- */
- device->dma_device = &device->dev;
- if (!device->dev.dma_mask) {
- if (parent)
- device->dev.dma_mask = parent->dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- if (!device->dev.coherent_dma_mask) {
- if (parent)
- device->dev.coherent_dma_mask =
- parent->coherent_dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- } else {
- /*
- * The caller did not provide custom DMA operations. Use the
- * DMA mapping operations of the parent device.
- */
- WARN_ON_ONCE(!parent);
- device->dma_device = parent;
- }
-
- if (!device->dev.dma_parms) {
- if (parent) {
- /*
- * The caller did not provide DMA parameters, so
- * 'parent' probably represents a PCI device. The PCI
- * core sets the maximum segment size to 64
- * KB. Increase this parameter to 2 GB.
- */
- device->dev.dma_parms = parent->dma_parms;
- dma_set_max_seg_size(device->dma_device, SZ_2G);
- } else {
- WARN_ON_ONCE(true);
- }
- }
-}
-
/*
* setup_device() allocates memory and sets up data that requires calling the
* device ops, this is the only reason these actions are not done during
@@ -1224,7 +1239,6 @@ static int setup_device(struct ib_device *device)
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
int ret;
- setup_dma_device(device);
ib_device_check_mandatory(device);
ret = setup_port_data(device);
@@ -1268,6 +1282,8 @@ static void disable_device(struct ib_device *device)
remove_client_context(device, cid);
}
+ ib_cq_pool_cleanup(device);
+
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
wait_for_completion(&device->unreg_completion);
@@ -1325,11 +1341,18 @@ out:
return ret;
}
+static void prevent_dealloc_device(struct ib_device *ib_dev)
+{
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device: Device to register
* @name: unique string device name. This may include a '%' which will
- * cause a unique index to be added to the passed device name.
+ * cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ * device will be used. In this case the caller should fully
+ * setup the ibdev for DMA. This usually means using dma_virt_ops.
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
@@ -1340,7 +1363,8 @@ out:
* asynchronously then the device pointer may become freed as soon as this
* function returns.
*/
-int ib_register_device(struct ib_device *device, const char *name)
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device)
{
int ret;
@@ -1348,6 +1372,14 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
return ret;
+ /*
+ * If the caller does not provide a DMA capable device then the IB core
+ * will set up ib_sge and scatterlist structures that stash the kernel
+ * virtual address into the address field.
+ */
+ WARN_ON(dma_device && !dma_device->dma_parms);
+ device->dma_device = dma_device;
+
ret = setup_device(device);
if (ret)
return ret;
@@ -1359,6 +1391,12 @@ int ib_register_device(struct ib_device *device, const char *name)
return ret;
}
+ device->groups[0] = &ib_dev_attr_group;
+ device->groups[1] = device->ops.device_group;
+ ret = ib_setup_device_attrs(device);
+ if (ret)
+ goto cache_cleanup;
+
ib_device_register_rdmacg(device);
rdma_counter_init(device);
@@ -1372,7 +1410,7 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
goto cg_cleanup;
- ret = ib_device_register_sysfs(device);
+ ret = ib_setup_port_attrs(&device->coredev);
if (ret) {
dev_warn(&device->dev,
"Couldn't register device with driver model\n");
@@ -1380,9 +1418,6 @@ int ib_register_device(struct ib_device *device, const char *name)
}
ret = enable_device_and_get(device);
- dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
if (ret) {
void (*dealloc_fn)(struct ib_device *);
@@ -1394,16 +1429,20 @@ int ib_register_device(struct ib_device *device, const char *name)
* possibility for a parallel unregistration along with this
* error flow. Since we have a refcount here we know any
* parallel flow is stopped in disable_device and will see the
- * NULL pointers, causing the responsibility to
+ * special dealloc_driver pointer, causing the responsibility to
* ib_dealloc_device() to revert back to this thread.
*/
dealloc_fn = device->ops.dealloc_driver;
- device->ops.dealloc_driver = NULL;
+ device->ops.dealloc_driver = prevent_dealloc_device;
ib_device_put(device);
__ib_unregister_device(device);
device->ops.dealloc_driver = dealloc_fn;
+ dev_set_uevent_suppress(&device->dev, false);
return ret;
}
+ dev_set_uevent_suppress(&device->dev, false);
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_put(device);
return 0;
@@ -1413,6 +1452,7 @@ dev_cleanup:
cg_cleanup:
dev_set_uevent_suppress(&device->dev, false);
ib_device_unregister_rdmacg(device);
+cache_cleanup:
ib_cache_cleanup_one(device);
return ret;
}
@@ -1437,7 +1477,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
/* Expedite removing unregistered pointers from the hash table */
free_netdevs(ib_dev);
- ib_device_unregister_sysfs(ib_dev);
+ ib_free_port_attrs(&ib_dev->coredev);
device_del(&ib_dev->dev);
ib_device_unregister_rdmacg(ib_dev);
ib_cache_cleanup_one(ib_dev);
@@ -1446,7 +1486,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
* Drivers using the new flow may not call ib_dealloc_device except
* in error unwind prior to registration success.
*/
- if (ib_dev->ops.dealloc_driver) {
+ if (ib_dev->ops.dealloc_driver &&
+ ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
ib_dealloc_device(ib_dev);
}
@@ -1562,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
WARN_ON(!refcount_read(&ib_dev->refcount));
WARN_ON(!ib_dev->ops.dealloc_driver);
get_device(&ib_dev->dev);
- if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+ if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
put_device(&ib_dev->dev);
}
EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -1654,13 +1695,11 @@ int ib_device_set_netns_put(struct sk_buff *skb,
}
/*
- * Currently supported only for those providers which support
- * disassociation and don't do port specific sysfs init. Once a
- * port_cleanup infrastructure is implemented, this limitation will be
- * removed.
+ * All the ib_clients, including uverbs, are reset when the namespace is
+ * changed and this cannot be blocked waiting for userspace to do
+ * something, so disassociation is mandatory.
*/
- if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
- ib_devices_shared_netns) {
+ if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) {
ret = -EOPNOTSUPP;
goto ns_err;
}
@@ -1868,9 +1907,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev,
/**
* ib_get_client_nl_info - Fetch the nl_info from a client
- * @device - IB device
- * @client_name - Name of the client
- * @res - Result of the query
+ * @ibdev: IB device
+ * @client_name: Name of the client
+ * @res: Result of the query
*/
int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
struct ib_client_nl_info *res)
@@ -1972,7 +2011,7 @@ void ib_dispatch_event_clients(struct ib_event *event)
}
static int iw_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
struct in_device *inetdev;
@@ -2011,10 +2050,9 @@ static int iw_query_port(struct ib_device *device,
}
static int __ib_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
- union ib_gid gid = {};
int err;
memset(port_attr, 0, sizeof(*port_attr));
@@ -2027,11 +2065,8 @@ static int __ib_query_port(struct ib_device *device,
IB_LINK_LAYER_INFINIBAND)
return 0;
- err = device->ops.query_gid(device, port_num, 0, &gid);
- if (err)
- return err;
-
- port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+ ib_get_cached_subnet_prefix(device, port_num,
+ &port_attr->subnet_prefix);
return 0;
}
@@ -2045,7 +2080,7 @@ static int __ib_query_port(struct ib_device *device,
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
if (!rdma_is_port_valid(device, port_num))
@@ -2097,7 +2132,7 @@ static void add_ndev_hash(struct ib_port_data *pdata)
* NETDEV_UNREGISTER event.
*/
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
- unsigned int port)
+ u32 port)
{
struct net_device *old_ndev;
struct ib_port_data *pdata;
@@ -2140,7 +2175,7 @@ EXPORT_SYMBOL(ib_device_set_netdev);
static void free_netdevs(struct ib_device *ib_dev)
{
unsigned long flags;
- unsigned int port;
+ u32 port;
if (!ib_dev->port_data)
return;
@@ -2171,7 +2206,7 @@ static void free_netdevs(struct ib_device *ib_dev)
}
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
- unsigned int port)
+ u32 port)
{
struct ib_port_data *pdata;
struct net_device *res;
@@ -2258,7 +2293,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_callback cb,
void *cookie)
{
- unsigned int port;
+ u32 port;
rdma_for_each_port (ib_dev, port)
if (rdma_protocol_roce(ib_dev, port)) {
@@ -2298,7 +2333,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
up_read(&devices_rwsem);
}
-/**
+/*
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
*
@@ -2336,11 +2371,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey)
+ u32 port_num, u16 index, u16 *pkey)
{
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ if (!device->ops.query_pkey)
+ return -EOPNOTSUPP;
+
return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -2378,7 +2416,7 @@ EXPORT_SYMBOL(ib_modify_device);
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
- u8 port_num, int port_modify_mask,
+ u32 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
int rc;
@@ -2410,10 +2448,10 @@ EXPORT_SYMBOL(ib_modify_port);
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index)
+ u32 *port_num, u16 *index)
{
union ib_gid tmp_gid;
- unsigned int port;
+ u32 port;
int ret, i;
rdma_for_each_port (device, port) {
@@ -2424,7 +2462,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
++i) {
ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
- return ret;
+ continue;
+
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
@@ -2447,7 +2486,7 @@ EXPORT_SYMBOL(ib_find_gid);
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
- u8 port_num, u16 pkey, u16 *index)
+ u32 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
@@ -2490,7 +2529,7 @@ EXPORT_SYMBOL(ib_find_pkey);
*
*/
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
- u8 port,
+ u32 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr)
@@ -2555,8 +2594,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, add_gid);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
- SET_DEVICE_OP(dev_ops, alloc_fmr);
- SET_DEVICE_OP(dev_ops, alloc_hw_stats);
+ SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
+ SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
SET_DEVICE_OP(dev_ops, alloc_mw);
@@ -2575,14 +2614,13 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
SET_DEVICE_OP(dev_ops, create_flow);
- SET_DEVICE_OP(dev_ops, create_flow_action_esp);
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_user_ah);
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
SET_DEVICE_OP(dev_ops, dealloc_driver);
- SET_DEVICE_OP(dev_ops, dealloc_fmr);
SET_DEVICE_OP(dev_ops, dealloc_mw);
SET_DEVICE_OP(dev_ops, dealloc_pd);
SET_DEVICE_OP(dev_ops, dealloc_ucontext);
@@ -2598,24 +2636,31 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
SET_DEVICE_OP(dev_ops, destroy_srq);
SET_DEVICE_OP(dev_ops, destroy_wq);
+ SET_DEVICE_OP(dev_ops, device_group);
SET_DEVICE_OP(dev_ops, detach_mcast);
SET_DEVICE_OP(dev_ops, disassociate_ucontext);
SET_DEVICE_OP(dev_ops, drain_rq);
SET_DEVICE_OP(dev_ops, drain_sq);
SET_DEVICE_OP(dev_ops, enable_driver);
- SET_DEVICE_OP(dev_ops, fill_res_entry);
- SET_DEVICE_OP(dev_ops, fill_stat_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
SET_DEVICE_OP(dev_ops, get_dev_fw_str);
SET_DEVICE_OP(dev_ops, get_dma_mr);
SET_DEVICE_OP(dev_ops, get_hw_stats);
SET_DEVICE_OP(dev_ops, get_link_layer);
SET_DEVICE_OP(dev_ops, get_netdev);
+ SET_DEVICE_OP(dev_ops, get_numa_node);
SET_DEVICE_OP(dev_ops, get_port_immutable);
SET_DEVICE_OP(dev_ops, get_vector_affinity);
SET_DEVICE_OP(dev_ops, get_vf_config);
SET_DEVICE_OP(dev_ops, get_vf_guid);
SET_DEVICE_OP(dev_ops, get_vf_stats);
- SET_DEVICE_OP(dev_ops, init_port);
SET_DEVICE_OP(dev_ops, iw_accept);
SET_DEVICE_OP(dev_ops, iw_add_ref);
SET_DEVICE_OP(dev_ops, iw_connect);
@@ -2626,19 +2671,19 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, iw_rem_ref);
SET_DEVICE_OP(dev_ops, map_mr_sg);
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
- SET_DEVICE_OP(dev_ops, map_phys_fmr);
SET_DEVICE_OP(dev_ops, mmap);
SET_DEVICE_OP(dev_ops, mmap_free);
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
- SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
SET_DEVICE_OP(dev_ops, modify_srq);
SET_DEVICE_OP(dev_ops, modify_wq);
SET_DEVICE_OP(dev_ops, peek_cq);
SET_DEVICE_OP(dev_ops, poll_cq);
+ SET_DEVICE_OP(dev_ops, port_groups);
SET_DEVICE_OP(dev_ops, post_recv);
SET_DEVICE_OP(dev_ops, post_send);
SET_DEVICE_OP(dev_ops, post_srq_recv);
@@ -2650,26 +2695,46 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, query_port);
SET_DEVICE_OP(dev_ops, query_qp);
SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, query_ucontext);
SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
SET_DEVICE_OP(dev_ops, read_counters);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
- SET_DEVICE_OP(dev_ops, req_ncomp_notif);
+ SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
SET_DEVICE_OP(dev_ops, resize_cq);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
- SET_DEVICE_OP(dev_ops, unmap_fmr);
SET_OBJ_SIZE(dev_ops, ib_ah);
+ SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
+ SET_OBJ_SIZE(dev_ops, ib_qp);
+ SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
+ SET_OBJ_SIZE(dev_ops, ib_xrcd);
}
EXPORT_SYMBOL(ib_set_device_ops);
+#ifdef CONFIG_INFINIBAND_VIRT_DMA
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ sg_dma_address(s) = (uintptr_t)sg_virt(s);
+ sg_dma_len(s) = s->length;
+ }
+ return nents;
+}
+EXPORT_SYMBOL(ib_dma_virt_map_sg);
+#endif /* CONFIG_INFINIBAND_VIRT_DMA */
+
static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
@@ -2687,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
static int __init ib_core_init(void)
{
- int ret;
+ int ret = -ENOMEM;
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
return -ENOMEM;
+ ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_unreg_wq)
+ goto err;
+
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
- if (!ib_comp_wq) {
- ret = -ENOMEM;
- goto err;
- }
+ if (!ib_comp_wq)
+ goto err_unbound;
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
- if (!ib_comp_unbound_wq) {
- ret = -ENOMEM;
+ if (!ib_comp_unbound_wq)
goto err_comp;
- }
ret = class_register(&ib_class);
if (ret) {
@@ -2719,7 +2785,7 @@ static int __init ib_core_init(void)
ret = addr_init();
if (ret) {
- pr_warn("Could't init IB address resolution\n");
+ pr_warn("Couldn't init IB address resolution\n");
goto err_ibnl;
}
@@ -2749,10 +2815,18 @@ static int __init ib_core_init(void)
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
- roce_gid_mgmt_init();
+ ret = roce_gid_mgmt_init();
+ if (ret) {
+ pr_warn("Couldn't init RoCE GID management\n");
+ goto err_parent;
+ }
return 0;
+err_parent:
+ rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
err_compat:
unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
err_sa:
@@ -2767,6 +2841,8 @@ err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
+err_unbound:
+ destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -2788,7 +2864,7 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
- flush_workqueue(system_unbound_wq);
+ destroy_workqueue(ib_unreg_wq);
WARN_ON(!xa_empty(&clients));
WARN_ON(!xa_empty(&devices));
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
deleted file mode 100644
index e08aec427027..000000000000
--- a/drivers/infiniband/core/fmr_pool.c
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/jhash.h>
-#include <linux/kthread.h>
-
-#include <rdma/ib_fmr_pool.h>
-
-#include "core_priv.h"
-
-#define PFX "fmr_pool: "
-
-enum {
- IB_FMR_MAX_REMAPS = 32,
-
- IB_FMR_HASH_BITS = 8,
- IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
- IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
-};
-
-/*
- * If an FMR is not in use, then the list member will point to either
- * its pool's free_list (if the FMR can be mapped again; that is,
- * remap_count < pool->max_remaps) or its pool's dirty_list (if the
- * FMR needs to be unmapped before being remapped). In either of
- * these cases it is a bug if the ref_count is not 0. In other words,
- * if ref_count is > 0, then the list member must not be linked into
- * either free_list or dirty_list.
- *
- * The cache_node member is used to link the FMR into a cache bucket
- * (if caching is enabled). This is independent of the reference
- * count of the FMR. When a valid FMR is released, its ref_count is
- * decremented, and if ref_count reaches 0, the FMR is placed in
- * either free_list or dirty_list as appropriate. However, it is not
- * removed from the cache and may be "revived" if a call to
- * ib_fmr_register_physical() occurs before the FMR is remapped. In
- * this case we just increment the ref_count and remove the FMR from
- * free_list/dirty_list.
- *
- * Before we remap an FMR from free_list, we remove it from the cache
- * (to prevent another user from obtaining a stale FMR). When an FMR
- * is released, we add it to the tail of the free list, so that our
- * cache eviction policy is "least recently used."
- *
- * All manipulation of ref_count, list and cache_node is protected by
- * pool_lock to maintain consistency.
- */
-
-struct ib_fmr_pool {
- spinlock_t pool_lock;
-
- int pool_size;
- int max_pages;
- int max_remaps;
- int dirty_watermark;
- int dirty_len;
- struct list_head free_list;
- struct list_head dirty_list;
- struct hlist_head *cache_bucket;
-
- void (*flush_function)(struct ib_fmr_pool *pool,
- void * arg);
- void *flush_arg;
-
- struct kthread_worker *worker;
- struct kthread_work work;
-
- atomic_t req_ser;
- atomic_t flush_ser;
-
- wait_queue_head_t force_wait;
-};
-
-static inline u32 ib_fmr_hash(u64 first_page)
-{
- return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
- (IB_FMR_HASH_SIZE - 1);
-}
-
-/* Caller must hold pool_lock */
-static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
- u64 *page_list,
- int page_list_len,
- u64 io_virtual_address)
-{
- struct hlist_head *bucket;
- struct ib_pool_fmr *fmr;
-
- if (!pool->cache_bucket)
- return NULL;
-
- bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
-
- hlist_for_each_entry(fmr, bucket, cache_node)
- if (io_virtual_address == fmr->io_virtual_address &&
- page_list_len == fmr->page_list_len &&
- !memcmp(page_list, fmr->page_list,
- page_list_len * sizeof *page_list))
- return fmr;
-
- return NULL;
-}
-
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
-{
- int ret;
- struct ib_pool_fmr *fmr;
- LIST_HEAD(unmap_list);
- LIST_HEAD(fmr_list);
-
- spin_lock_irq(&pool->pool_lock);
-
- list_for_each_entry(fmr, &pool->dirty_list, list) {
- hlist_del_init(&fmr->cache_node);
- fmr->remap_count = 0;
- list_add_tail(&fmr->fmr->list, &fmr_list);
- }
-
- list_splice_init(&pool->dirty_list, &unmap_list);
- pool->dirty_len = 0;
-
- spin_unlock_irq(&pool->pool_lock);
-
- if (list_empty(&unmap_list)) {
- return;
- }
-
- ret = ib_unmap_fmr(&fmr_list);
- if (ret)
- pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
-
- spin_lock_irq(&pool->pool_lock);
- list_splice(&unmap_list, &pool->free_list);
- spin_unlock_irq(&pool->pool_lock);
-}
-
-static void ib_fmr_cleanup_func(struct kthread_work *work)
-{
- struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
-
- ib_fmr_batch_release(pool);
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
-
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
- kthread_queue_work(pool->worker, &pool->work);
-}
-
-/**
- * ib_create_fmr_pool - Create an FMR pool
- * @pd:Protection domain for FMRs
- * @params:FMR pool parameters
- *
- * Create a pool of FMRs. Return value is pointer to new pool or
- * error code if creation failed.
- */
-struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
- struct ib_fmr_pool_param *params)
-{
- struct ib_device *device;
- struct ib_fmr_pool *pool;
- int i;
- int ret;
- int max_remaps;
-
- if (!params)
- return ERR_PTR(-EINVAL);
-
- device = pd->device;
- if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr ||
- !device->ops.map_phys_fmr || !device->ops.unmap_fmr) {
- dev_info(&device->dev, "Device does not support FMRs\n");
- return ERR_PTR(-ENOSYS);
- }
-
- if (!device->attrs.max_map_per_fmr)
- max_remaps = IB_FMR_MAX_REMAPS;
- else
- max_remaps = device->attrs.max_map_per_fmr;
-
- pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool)
- return ERR_PTR(-ENOMEM);
-
- pool->cache_bucket = NULL;
- pool->flush_function = params->flush_function;
- pool->flush_arg = params->flush_arg;
-
- INIT_LIST_HEAD(&pool->free_list);
- INIT_LIST_HEAD(&pool->dirty_list);
-
- if (params->cache) {
- pool->cache_bucket =
- kmalloc_array(IB_FMR_HASH_SIZE,
- sizeof(*pool->cache_bucket),
- GFP_KERNEL);
- if (!pool->cache_bucket) {
- ret = -ENOMEM;
- goto out_free_pool;
- }
-
- for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
- INIT_HLIST_HEAD(pool->cache_bucket + i);
- }
-
- pool->pool_size = 0;
- pool->max_pages = params->max_pages_per_fmr;
- pool->max_remaps = max_remaps;
- pool->dirty_watermark = params->dirty_watermark;
- pool->dirty_len = 0;
- spin_lock_init(&pool->pool_lock);
- atomic_set(&pool->req_ser, 0);
- atomic_set(&pool->flush_ser, 0);
- init_waitqueue_head(&pool->force_wait);
-
- pool->worker =
- kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
- if (IS_ERR(pool->worker)) {
- pr_warn(PFX "couldn't start cleanup kthread worker\n");
- ret = PTR_ERR(pool->worker);
- goto out_free_pool;
- }
- kthread_init_work(&pool->work, ib_fmr_cleanup_func);
-
- {
- struct ib_pool_fmr *fmr;
- struct ib_fmr_attr fmr_attr = {
- .max_pages = params->max_pages_per_fmr,
- .max_maps = pool->max_remaps,
- .page_shift = params->page_shift
- };
- int bytes_per_fmr = sizeof *fmr;
-
- if (pool->cache_bucket)
- bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
-
- for (i = 0; i < params->pool_size; ++i) {
- fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
- if (!fmr)
- goto out_fail;
-
- fmr->pool = pool;
- fmr->remap_count = 0;
- fmr->ref_count = 0;
- INIT_HLIST_NODE(&fmr->cache_node);
-
- fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
- if (IS_ERR(fmr->fmr)) {
- pr_warn(PFX "fmr_create failed for FMR %d\n",
- i);
- kfree(fmr);
- goto out_fail;
- }
-
- list_add_tail(&fmr->list, &pool->free_list);
- ++pool->pool_size;
- }
- }
-
- return pool;
-
- out_free_pool:
- kfree(pool->cache_bucket);
- kfree(pool);
-
- return ERR_PTR(ret);
-
- out_fail:
- ib_destroy_fmr_pool(pool);
-
- return ERR_PTR(-ENOMEM);
-}
-EXPORT_SYMBOL(ib_create_fmr_pool);
-
-/**
- * ib_destroy_fmr_pool - Free FMR pool
- * @pool:FMR pool to free
- *
- * Destroy an FMR pool and free all associated resources.
- */
-void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
-{
- struct ib_pool_fmr *fmr;
- struct ib_pool_fmr *tmp;
- LIST_HEAD(fmr_list);
- int i;
-
- kthread_destroy_worker(pool->worker);
- ib_fmr_batch_release(pool);
-
- i = 0;
- list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
- if (fmr->remap_count) {
- INIT_LIST_HEAD(&fmr_list);
- list_add_tail(&fmr->fmr->list, &fmr_list);
- ib_unmap_fmr(&fmr_list);
- }
- ib_dealloc_fmr(fmr->fmr);
- list_del(&fmr->list);
- kfree(fmr);
- ++i;
- }
-
- if (i < pool->pool_size)
- pr_warn(PFX "pool still has %d regions registered\n",
- pool->pool_size - i);
-
- kfree(pool->cache_bucket);
- kfree(pool);
-}
-EXPORT_SYMBOL(ib_destroy_fmr_pool);
-
-/**
- * ib_flush_fmr_pool - Invalidate all unmapped FMRs
- * @pool:FMR pool to flush
- *
- * Ensure that all unmapped FMRs are fully invalidated.
- */
-int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
-{
- int serial;
- struct ib_pool_fmr *fmr, *next;
-
- /*
- * The free_list holds FMRs that may have been used
- * but have not been remapped enough times to be dirty.
- * Put them on the dirty list now so that the cleanup
- * thread will reap them too.
- */
- spin_lock_irq(&pool->pool_lock);
- list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
- if (fmr->remap_count > 0)
- list_move(&fmr->list, &pool->dirty_list);
- }
- spin_unlock_irq(&pool->pool_lock);
-
- serial = atomic_inc_return(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
-
- if (wait_event_interruptible(pool->force_wait,
- atomic_read(&pool->flush_ser) - serial >= 0))
- return -EINTR;
-
- return 0;
-}
-EXPORT_SYMBOL(ib_flush_fmr_pool);
-
-/**
- * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
- * @pool_handle: FMR pool to allocate FMR from
- * @page_list: List of pages to map
- * @list_len: Number of pages in @page_list
- * @io_virtual_address: I/O virtual address for new FMR
- */
-struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
- u64 *page_list,
- int list_len,
- u64 io_virtual_address)
-{
- struct ib_fmr_pool *pool = pool_handle;
- struct ib_pool_fmr *fmr;
- unsigned long flags;
- int result;
-
- if (list_len < 1 || list_len > pool->max_pages)
- return ERR_PTR(-EINVAL);
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- fmr = ib_fmr_cache_lookup(pool,
- page_list,
- list_len,
- io_virtual_address);
- if (fmr) {
- /* found in cache */
- ++fmr->ref_count;
- if (fmr->ref_count == 1) {
- list_del(&fmr->list);
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return fmr;
- }
-
- if (list_empty(&pool->free_list)) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- return ERR_PTR(-EAGAIN);
- }
-
- fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
- list_del(&fmr->list);
- hlist_del_init(&fmr->cache_node);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
- io_virtual_address);
-
- if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- pr_warn(PFX "fmr_map returns %d\n", result);
-
- return ERR_PTR(result);
- }
-
- ++fmr->remap_count;
- fmr->ref_count = 1;
-
- if (pool->cache_bucket) {
- fmr->io_virtual_address = io_virtual_address;
- fmr->page_list_len = list_len;
- memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- hlist_add_head(&fmr->cache_node,
- pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_fmr_pool_map_phys);
-
-/**
- * ib_fmr_pool_unmap - Unmap FMR
- * @fmr:FMR to unmap
- *
- * Unmap an FMR. The FMR mapping may remain valid until the FMR is
- * reused (or until ib_flush_fmr_pool() is called).
- */
-void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
-{
- struct ib_fmr_pool *pool;
- unsigned long flags;
-
- pool = fmr->pool;
-
- spin_lock_irqsave(&pool->pool_lock, flags);
-
- --fmr->ref_count;
- if (!fmr->ref_count) {
- if (fmr->remap_count < pool->max_remaps) {
- list_add_tail(&fmr->list, &pool->free_list);
- } else {
- list_add_tail(&fmr->list, &pool->dirty_list);
- if (++pool->dirty_len >= pool->dirty_watermark) {
- atomic_inc(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
- }
- }
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-}
-EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index da8adadf4755..2b47073c61a6 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -211,8 +211,7 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+ if (refcount_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
return 1;
@@ -225,7 +224,7 @@ static void add_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
}
static void rem_ref(struct iw_cm_id *cm_id)
@@ -257,7 +256,7 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
cm_id_priv->id.add_ref = add_ref;
cm_id_priv->id.rem_ref = rem_ref;
spin_lock_init(&cm_id_priv->lock);
- atomic_set(&cm_id_priv->refcount, 1);
+ refcount_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
init_completion(&cm_id_priv->destroy_comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
@@ -1094,7 +1093,7 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}
}
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
if (list_empty(&cm_id_priv->work_list)) {
list_add_tail(&work->list, &cm_id_priv->work_list);
queue_work(iwcm_wq, &work->work);
@@ -1187,29 +1186,34 @@ static int __init iw_cm_init(void)
ret = iwpm_init(RDMA_NL_IWCM);
if (ret)
- pr_err("iw_cm: couldn't init iwpm\n");
- else
- rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
+ return ret;
+
iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
if (!iwcm_wq)
- return -ENOMEM;
+ goto err_alloc;
iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
iwcm_ctl_table);
if (!iwcm_ctl_table_hdr) {
pr_err("iw_cm: couldn't register sysctl paths\n");
- destroy_workqueue(iwcm_wq);
- return -ENOMEM;
+ goto err_sysctl;
}
+ rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
return 0;
+
+err_sysctl:
+ destroy_workqueue(iwcm_wq);
+err_alloc:
+ iwpm_exit(RDMA_NL_IWCM);
+ return -ENOMEM;
}
static void __exit iw_cm_cleanup(void)
{
+ rdma_nl_unregister(RDMA_NL_IWCM);
unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
- rdma_nl_unregister(RDMA_NL_IWCM);
iwpm_exit(RDMA_NL_IWCM);
}
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 82c2cd1b0a80..bf74639be128 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -52,7 +52,7 @@ struct iwcm_id_private {
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head work_free_list;
};
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 46686990a827..3c9a9869212b 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -69,10 +69,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto pid_query_error;
- }
if (iwpm_check_registration(nl_client, IWPM_REG_VALID) ||
iwpm_user_pid == IWPM_PID_UNAVAILABLE)
return 0;
@@ -123,7 +119,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
pid_query_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
@@ -153,10 +149,6 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto add_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -211,7 +203,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
add_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
add_mapping_error_nowarn:
dev_kfree_skb(skb);
if (nlmsg_request)
@@ -240,10 +232,6 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto query_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -304,7 +292,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
query_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
query_mapping_error_nowarn:
dev_kfree_skb(skb);
if (nlmsg_request)
@@ -331,10 +319,6 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto remove_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) {
@@ -372,7 +356,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
"remove_mapping: Local sockaddr:");
return 0;
remove_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb_any(skb);
return ret;
@@ -392,7 +376,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
/**
* iwpm_register_pid_cb - Process the port mapper response to
* iwpm_register_pid query
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* If successful, the function receives the userspace port mapper pid
@@ -431,7 +415,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+ pr_info("%s: Incorrect info (dev = %s name = %s version = %u)\n",
__func__, dev_name, iwpm_name, iwpm_version);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto register_pid_response_exit;
@@ -439,13 +423,12 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_user_pid = cb->nlh->nlmsg_pid;
iwpm_ulib_version = iwpm_version;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
- pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
__func__, iwpm_user_pid);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_user_pid);
- if (iwpm_valid_client(nl_client))
- iwpm_set_registration(nl_client, IWPM_REG_VALID);
+ iwpm_set_registration(nl_client, IWPM_REG_VALID);
register_pid_response_exit:
nlmsg_request->request_done = 1;
/* always for found nlmsg_request */
@@ -468,7 +451,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
/**
* iwpm_add_mapping_cb - Process the port mapper response to
* iwpm_add_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -528,7 +511,8 @@ add_mapping_response_exit:
}
/* netlink attribute policy for the response to add and query mapping request
- * and response with remote address info */
+ * and response with remote address info
+ */
static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
[IWPM_NLA_RQUERY_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_RQUERY_LOCAL_ADDR] = {
@@ -545,7 +529,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] =
/**
* iwpm_add_and_query_mapping_cb - Process the port mapper response to
* iwpm_add_and_query_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
@@ -627,7 +611,7 @@ query_mapping_response_exit:
/**
* iwpm_remote_info_cb - Process remote connecting peer address info, which
* the port mapper has received from the connecting peer
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Stores the IPv4/IPv6 address info in a hash table
@@ -648,11 +632,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
local_sockaddr = (struct sockaddr_storage *)
@@ -706,7 +685,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
/**
* iwpm_mapping_info_cb - Process a notification that the userspace
* port mapper daemon is started
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send all the local mapping
@@ -730,22 +709,17 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
if (strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Invalid port mapper name = %s version = %d\n",
+ pr_info("%s: Invalid port mapper name = %s version = %u\n",
__func__, iwpm_name, iwpm_version);
return ret;
}
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_user_pid = cb->nlh->nlmsg_pid;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
- pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
__func__, iwpm_user_pid);
if (!iwpm_mapinfo_available())
@@ -766,7 +740,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
/**
* iwpm_ack_mapping_info_cb - Process the port mapper ack for
* the provided local mapping info records
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -796,7 +770,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
/**
* iwpm_mapping_error_cb - Process port mapper notification for error
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -841,7 +815,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
/**
* iwpm_hello_cb - Process a hello message from iwpmd
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send the kernel's abi_version
@@ -862,11 +836,6 @@ int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 13495b43dbc1..358a2db38d23 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -48,7 +48,6 @@ static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
static struct hlist_head *iwpm_reminfo_bucket;
static DEFINE_SPINLOCK(iwpm_reminfo_lock);
-static DEFINE_MUTEX(iwpm_admin_lock);
static struct iwpm_admin_data iwpm_admin;
/**
@@ -59,35 +58,21 @@ static struct iwpm_admin_data iwpm_admin;
*/
int iwpm_init(u8 nl_client)
{
- int ret = 0;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!iwpm_hash_bucket) {
- ret = -ENOMEM;
- goto init_exit;
- }
- iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!iwpm_reminfo_bucket) {
- kfree(iwpm_hash_bucket);
- ret = -ENOMEM;
- goto init_exit;
- }
- }
- atomic_inc(&iwpm_admin.refcount);
-init_exit:
- mutex_unlock(&iwpm_admin_lock);
- if (!ret) {
- iwpm_set_valid(nl_client, 1);
- iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
- pr_debug("%s: Mapinfo and reminfo tables are created\n",
- __func__);
+ iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_hash_bucket)
+ return -ENOMEM;
+
+ iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_reminfo_bucket) {
+ kfree(iwpm_hash_bucket);
+ return -ENOMEM;
}
- return ret;
+
+ iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
+ pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__);
+ return 0;
}
static void free_hash_bucket(void);
@@ -101,22 +86,9 @@ static void free_reminfo_bucket(void);
*/
int iwpm_exit(u8 nl_client)
{
-
- if (!iwpm_valid_client(nl_client))
- return -EINVAL;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- mutex_unlock(&iwpm_admin_lock);
- pr_err("%s Incorrect usage - negative refcount\n", __func__);
- return -EINVAL;
- }
- if (atomic_dec_and_test(&iwpm_admin.refcount)) {
- free_hash_bucket();
- free_reminfo_bucket();
- pr_debug("%s: Resources are destroyed\n", __func__);
- }
- mutex_unlock(&iwpm_admin_lock);
- iwpm_set_valid(nl_client, 0);
+ free_hash_bucket();
+ free_reminfo_bucket();
+ pr_debug("%s: Resources are destroyed\n", __func__);
iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
return 0;
}
@@ -127,8 +99,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
/**
* iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
* info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
+ * @mapped_sockaddr: Mapped local ip/tcp address
* @nl_client: The index of the netlink client
* @map_flags: IWPM mapping flags
*/
@@ -141,8 +113,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client))
- return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
if (!map_info)
return -ENOMEM;
@@ -174,7 +144,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
/**
* iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
* info from the hash table
- * @local_addr: Local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
* @mapped_local_addr: Mapped local ip/tcp address
*
* Returns err code if mapping info is not found in the hash table,
@@ -302,10 +272,6 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid client = %d\n", __func__, nl_client);
- return ret;
- }
spin_lock_irqsave(&iwpm_reminfo_lock, flags);
if (iwpm_reminfo_bucket) {
hash_bucket_head = get_reminfo_hash_bucket(
@@ -420,16 +386,6 @@ int iwpm_get_nlmsg_seq(void)
return atomic_inc_return(&iwpm_admin.nlmsg_seq);
}
-int iwpm_valid_client(u8 nl_client)
-{
- return iwpm_admin.client_list[nl_client];
-}
-
-void iwpm_set_valid(u8 nl_client, int valid)
-{
- iwpm_admin.client_list[nl_client] = valid;
-}
-
/* valid client */
u32 iwpm_get_registration(u8 nl_client)
{
@@ -651,7 +607,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
err_str = "Unable to send a nlmsg";
goto mapinfo_num_error;
}
- pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+ pr_debug("%s: Sent mapping number = %u\n", __func__, mapping_num);
return 0;
mapinfo_num_error:
pr_info("%s: %s\n", __func__, err_str);
@@ -806,7 +762,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
- const char *err_str = "";
+ const char *err_str;
int ret = -EINVAL;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 1bf87d9fd0bd..d6fc8402158a 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -33,7 +33,6 @@
#ifndef _IWPM_UTIL_H
#define _IWPM_UTIL_H
-#include <linux/module.h>
#include <linux/io.h>
#include <linux/in.h>
#include <linux/in6.h>
@@ -90,9 +89,7 @@ struct iwpm_remote_info {
};
struct iwpm_admin_data {
- atomic_t refcount;
atomic_t nlmsg_seq;
- int client_list[RDMA_NL_NUM_CLIENTS];
u32 reg_list[RDMA_NL_NUM_CLIENTS];
};
@@ -141,29 +138,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
int iwpm_get_nlmsg_seq(void);
/**
- * iwpm_add_reminfo - Add remote address info of the connecting peer
+ * iwpm_add_remote_info - Add remote address info of the connecting peer
* to the remote info hash table
* @reminfo: The remote info to be added
*/
void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
/**
- * iwpm_valid_client - Check if the port mapper client is valid
- * @nl_client: The index of the netlink client
- *
- * Valid clients need to call iwpm_init() before using
- * the port mapper
- */
-int iwpm_valid_client(u8 nl_client);
-
-/**
- * iwpm_set_valid - Set the port mapper client to valid or not
- * @nl_client: The index of the netlink client
- * @valid: 1 if valid or 0 if invalid
- */
-void iwpm_set_valid(u8 nl_client, int valid);
-
-/**
* iwpm_check_registration - Check if the client registration
* matches the given one
* @nl_client: The index of the netlink client
@@ -183,7 +164,7 @@ u32 iwpm_check_registration(u8 nl_client, u32 reg);
void iwpm_set_registration(u8 nl_client, u32 reg);
/**
- * iwpm_get_registration
+ * iwpm_get_registration - Get the client registration
* @nl_client: The index of the netlink client
*
* Returns the client registration type
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..c77d7d2559a1
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, flags);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source =
+ htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(master, ah_attr, flags);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_read_lock();
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ if (slave)
+ dev_hold(slave);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave)
+{
+ if (xmit_slave)
+ dev_put(xmit_slave);
+}
+
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave = NULL;
+ struct net_device *master;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ ah_attr->grh.flow_label))
+ return NULL;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return master;
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master))
+ goto put;
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
+put:
+ dev_put(master);
+ return slave;
+}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c54db13fa9b0..1893aa613ad7 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -61,7 +61,7 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
{
u16 pkey;
struct ib_device *dev = qp_info->port_priv->device;
- u8 pnum = qp_info->port_priv->port_num;
+ u32 pnum = qp_info->port_priv->port_num;
struct ib_ud_wr *wr = &mad_send_wr->send_wr;
struct rdma_ah_attr attr = {};
@@ -85,7 +85,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
-/* Client ID 0 is used for snoop-only clients */
static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
@@ -119,7 +118,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
* Assumes ib_mad_port_list_lock is being held
*/
static inline struct ib_mad_port_private *
-__ib_get_mad_port(struct ib_device *device, int port_num)
+__ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
@@ -135,7 +134,7 @@ __ib_get_mad_port(struct ib_device *device, int port_num)
* for a device/port
*/
static inline struct ib_mad_port_private *
-ib_get_mad_port(struct ib_device *device, int port_num)
+ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
unsigned long flags;
@@ -156,8 +155,7 @@ static inline u8 convert_mgmt_class(u8 mgmt_class)
static int get_spl_qp_index(enum ib_qp_type qp_type)
{
- switch (qp_type)
- {
+ switch (qp_type) {
case IB_QPT_SMI:
return 0;
case IB_QPT_GSI:
@@ -223,7 +221,7 @@ EXPORT_SYMBOL(ib_response_mad);
* Context: Process context.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum ib_qp_type qp_type,
struct ib_mad_reg_req *mad_reg_req,
u8 rmpp_version,
@@ -353,7 +351,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
+ dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
__func__, port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
@@ -403,7 +401,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
- atomic_set(&mad_agent_priv->refcount, 1);
+ refcount_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
@@ -483,141 +481,12 @@ error1:
}
EXPORT_SYMBOL(ib_register_mad_agent);
-static inline int is_snooping_sends(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (/*IB_MAD_SNOOP_POSTED_SENDS |
- IB_MAD_SNOOP_RMPP_SENDS |*/
- IB_MAD_SNOOP_SEND_COMPLETIONS /*|
- IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
-}
-
-static inline int is_snooping_recvs(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (IB_MAD_SNOOP_RECVS /*|
- IB_MAD_SNOOP_RMPP_RECVS*/));
-}
-
-static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
- struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_snoop_private **new_snoop_table;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- /* Check for empty slot in array. */
- for (i = 0; i < qp_info->snoop_table_size; i++)
- if (!qp_info->snoop_table[i])
- break;
-
- if (i == qp_info->snoop_table_size) {
- /* Grow table. */
- new_snoop_table = krealloc(qp_info->snoop_table,
- sizeof mad_snoop_priv *
- (qp_info->snoop_table_size + 1),
- GFP_ATOMIC);
- if (!new_snoop_table) {
- i = -ENOMEM;
- goto out;
- }
-
- qp_info->snoop_table = new_snoop_table;
- qp_info->snoop_table_size++;
- }
- qp_info->snoop_table[i] = mad_snoop_priv;
- atomic_inc(&qp_info->snoop_count);
-out:
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- return i;
-}
-
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_mad_agent *ret;
- struct ib_mad_snoop_private *mad_snoop_priv;
- int qpn;
- int err;
-
- /* Validate parameters */
- if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
- (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- qpn = get_spl_qp_index(qp_type);
- if (qpn == -1) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- port_priv = ib_get_mad_port(device, port_num);
- if (!port_priv) {
- ret = ERR_PTR(-ENODEV);
- goto error1;
- }
- /* Allocate structures */
- mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
- if (!mad_snoop_priv) {
- ret = ERR_PTR(-ENOMEM);
- goto error1;
- }
-
- /* Now, fill in the various structures */
- mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
- mad_snoop_priv->agent.device = device;
- mad_snoop_priv->agent.recv_handler = recv_handler;
- mad_snoop_priv->agent.snoop_handler = snoop_handler;
- mad_snoop_priv->agent.context = context;
- mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
- mad_snoop_priv->agent.port_num = port_num;
- mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
- init_completion(&mad_snoop_priv->comp);
-
- err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
- if (err) {
- ret = ERR_PTR(err);
- goto error2;
- }
-
- mad_snoop_priv->snoop_index = register_snoop_agent(
- &port_priv->qp_info[qpn],
- mad_snoop_priv);
- if (mad_snoop_priv->snoop_index < 0) {
- ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error3;
- }
-
- atomic_set(&mad_snoop_priv->refcount, 1);
- return &mad_snoop_priv->agent;
-error3:
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-error2:
- kfree(mad_snoop_priv);
-error1:
- return ret;
-}
-EXPORT_SYMBOL(ib_register_mad_snoop);
-
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
- if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ if (refcount_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
-static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- if (atomic_dec_and_test(&mad_snoop_priv->refcount))
- complete(&mad_snoop_priv->comp);
-}
-
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
@@ -639,10 +508,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
flush_workqueue(port_priv->wq);
- ib_cancel_rmpp_recvs(mad_agent_priv);
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_cancel_rmpp_recvs(mad_agent_priv);
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
@@ -650,25 +519,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
kfree_rcu(mad_agent_priv, rcu);
}
-static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_qp_info *qp_info;
- unsigned long flags;
-
- qp_info = mad_snoop_priv->qp_info;
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
- atomic_dec(&qp_info->snoop_count);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-
- deref_snoop_agent(mad_snoop_priv);
- wait_for_completion(&mad_snoop_priv->comp);
-
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-
- kfree(mad_snoop_priv);
-}
-
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*
@@ -677,20 +527,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_snoop_private *mad_snoop_priv;
-
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
- struct ib_mad_snoop_private,
- agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -706,59 +547,8 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
spin_unlock_irqrestore(&mad_queue->lock, flags);
}
-static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_buf, mad_send_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
-static void snoop_recv(struct ib_mad_qp_info *qp_info,
- struct ib_mad_recv_wc *mad_recv_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
- mad_recv_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
- u16 pkey_index, u8 port_num, struct ib_wc *wc)
+ u16 pkey_index, u32 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
wc->wr_cqe = cqe;
@@ -817,7 +607,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_port_private *port_priv;
struct ib_mad_agent_private *recv_mad_agent = NULL;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num;
+ u32 port_num;
struct ib_wc mad_wc;
struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
@@ -916,8 +706,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
(const struct ib_mad *)smp,
(struct ib_mad *)mad_priv->mad, &mad_size,
&out_mad_pkey_index);
- switch (ret)
- {
+ switch (ret) {
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
mad_agent_priv->agent.recv_handler) {
@@ -927,7 +716,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* Reference MAD agent until receive
* side of local completion handled
*/
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
} else
kfree(mad_priv);
break;
@@ -967,7 +756,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
@@ -1016,7 +805,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
/* Allocate data segments. */
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
- seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+ seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
if (!seg) {
free_send_rmpp_list(send_wr);
return -ENOMEM;
@@ -1046,12 +835,11 @@ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
}
EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
-struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
- u32 remote_qpn, u16 pkey_index,
- int rmpp_active,
- int hdr_len, int data_len,
- gfp_t gfp_mask,
- u8 base_version)
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+ u32 remote_qpn, u16 pkey_index,
+ int rmpp_active, int hdr_len,
+ int data_len, gfp_t gfp_mask,
+ u8 base_version)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
@@ -1125,7 +913,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
mad_send_wr->send_buf.mad_agent = mad_agent;
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -1340,7 +1128,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->status = IB_WC_SUCCESS;
/* Reference MAD agent until send completes */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
@@ -1357,7 +1145,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_del(&mad_send_wr->agent_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
goto error;
}
}
@@ -1484,11 +1272,9 @@ static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
int i;
/* Remove any methods for this mad agent */
- for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
- if (method->agent[i] == agent) {
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
+ if (method->agent[i] == agent)
method->agent[i] = NULL;
- }
- }
}
static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
@@ -1663,9 +1449,8 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
* Was MAD registration request supplied
* with original registration ?
*/
- if (!agent_priv->reg_req) {
+ if (!agent_priv->reg_req)
goto out;
- }
port_priv = agent_priv->qp_info->port_priv;
mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
@@ -1763,7 +1548,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
rcu_read_lock();
mad_agent = xa_load(&ib_mad_clients, hi_tid);
- if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+ if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
mad_agent = NULL;
rcu_read_unlock();
} else {
@@ -1815,14 +1600,14 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
}
}
if (mad_agent)
- atomic_inc(&mad_agent->refcount);
+ refcount_inc(&mad_agent->refcount);
out:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
if (mad_agent && !mad_agent->agent.recv_handler) {
dev_notice(&port_priv->device->dev,
- "No receive handler for client %p on port %d\n",
+ "No receive handler for client %p on port %u\n",
&mad_agent->agent, port_priv->port_num);
deref_mad_agent(mad_agent);
mad_agent = NULL;
@@ -1841,7 +1626,7 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
/* Make sure MAD base version is understood */
if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
(!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
- pr_err("MAD received with unsupported base version %d %s\n",
+ pr_err("MAD received with unsupported base version %u %s\n",
mad_hdr->base_version, opa ? "(opa)" : "");
goto out;
}
@@ -1886,15 +1671,16 @@ static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
rwc->recv_buf.mad->mad_hdr.mgmt_class;
}
-static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
- const struct ib_mad_send_wr_private *wr,
- const struct ib_mad_recv_wc *rwc )
+static inline int
+rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
+ const struct ib_mad_send_wr_private *wr,
+ const struct ib_mad_recv_wc *rwc)
{
struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num = mad_agent_priv->agent.port_num;
+ u32 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
bool has_grh;
@@ -2040,10 +1826,11 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent, NULL,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
} else {
/* not user rmpp, revert to normal behavior and
- * drop the mad */
+ * drop the mad
+ */
ib_free_recv_mad(mad_recv_wc);
deref_mad_agent(mad_agent_priv);
return;
@@ -2057,7 +1844,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
&mad_agent_priv->agent,
&mad_send_wr->send_buf,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
@@ -2069,14 +1856,12 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
-
- return;
}
static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
const struct ib_mad_qp_info *qp_info,
const struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
@@ -2163,7 +1948,7 @@ static enum smi_action
handle_opa_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
@@ -2219,7 +2004,7 @@ static enum smi_action
handle_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response,
bool opa)
@@ -2243,7 +2028,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
struct ib_mad_agent_private *mad_agent;
- int port_num;
+ u32 port_num;
int ret = IB_MAD_RESULT_SUCCESS;
size_t mad_size;
u16 resp_mad_pkey_index = 0;
@@ -2289,9 +2074,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
recv->header.recv_wc.recv_buf.grh = &recv->grh;
- if (atomic_read(&qp_info->snoop_count))
- snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
-
/* Validate MAD */
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
@@ -2414,9 +2196,10 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
temp_mad_send_wr->timeout))
break;
}
- }
- else
+ } else {
list_item = &mad_agent_priv->wait_list;
+ }
+
list_add(&mad_send_wr->agent_list, list_item);
/* Reschedule a work item if we have a shorter timeout */
@@ -2470,7 +2253,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
adjust_timeout(mad_agent_priv);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_send_wr->status != IB_WC_SUCCESS )
+ if (mad_send_wr->status != IB_WC_SUCCESS)
mad_send_wc->status = mad_send_wr->status;
if (ret == IB_RMPP_RESULT_INTERNAL)
ib_rmpp_send_handler(mad_send_wc);
@@ -2538,9 +2321,6 @@ retry:
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = wc->status;
mad_send_wc.vendor_err = wc->vendor_err;
- if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
@@ -2653,7 +2433,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
list_del(&mad_send_wr->agent_list);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
}
}
@@ -2679,16 +2459,18 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
return NULL;
}
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
unsigned long flags;
int active;
- mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
- agent);
+ if (!send_buf)
+ return -EINVAL;
+
+ mad_agent_priv = container_of(send_buf->mad_agent,
+ struct ib_mad_agent_private, agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
@@ -2713,13 +2495,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_modify_mad);
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf)
-{
- ib_modify_mad(mad_agent, send_buf, 0);
-}
-EXPORT_SYMBOL(ib_cancel_mad);
-
static void local_completions(struct work_struct *work)
{
struct ib_mad_agent_private *mad_agent_priv;
@@ -2782,16 +2557,12 @@ static void local_completions(struct work_struct *work)
local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
local->mad_priv->header.recv_wc.recv_buf.mad =
(struct ib_mad *)local->mad_priv->mad;
- if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
- snoop_recv(recv_mad_agent->qp_info,
- &local->mad_priv->header.recv_wc,
- IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
- atomic_dec(&recv_mad_agent->refcount);
+ deref_mad_agent(recv_mad_agent);
spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
}
@@ -2800,15 +2571,11 @@ local_send_completion:
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
- if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info,
- &local->mad_send_wr->send_buf,
- &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
if (free_mad)
kfree(local->mad_priv);
kfree(local);
@@ -2894,7 +2661,7 @@ static void timeout_sends(struct work_struct *work)
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -2941,6 +2708,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
+ kfree(mad_priv);
ret = -ENOMEM;
break;
}
@@ -3099,7 +2867,7 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
/* It's worse than that! He's dead, Jim! */
dev_err(&qp_info->port_priv->device->dev,
- "Fatal error (%d) on MAD QP (%d)\n",
+ "Fatal error (%d) on MAD QP (%u)\n",
event->event, qp_info->qp->qp_num);
}
@@ -3119,10 +2887,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv,
init_mad_queue(qp_info, &qp_info->send_queue);
init_mad_queue(qp_info, &qp_info->recv_queue);
INIT_LIST_HEAD(&qp_info->overflow_list);
- spin_lock_init(&qp_info->snoop_lock);
- qp_info->snoop_table = NULL;
- qp_info->snoop_table_size = 0;
- atomic_set(&qp_info->snoop_count, 0);
}
static int create_mad_qp(struct ib_mad_qp_info *qp_info,
@@ -3166,7 +2930,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
return;
ib_destroy_qp(qp_info->qp);
- kfree(qp_info->snoop_table);
}
/*
@@ -3174,7 +2937,7 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
* Create the QP, PD, MR, and CQ if needed
*/
static int ib_mad_port_open(struct ib_device *device,
- int port_num)
+ u32 port_num)
{
int ret, cq_size;
struct ib_mad_port_private *port_priv;
@@ -3229,7 +2992,7 @@ static int ib_mad_port_open(struct ib_device *device,
if (ret)
goto error7;
- snprintf(name, sizeof name, "ib_mad%d", port_num);
+ snprintf(name, sizeof(name), "ib_mad%u", port_num);
port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!port_priv->wq) {
ret = -ENOMEM;
@@ -3275,7 +3038,7 @@ error3:
* If there are no classes using the port, free the port
* resources (CQ, MR, PD, QP) and remove the port's info structure
*/
-static int ib_mad_port_close(struct ib_device *device, int port_num)
+static int ib_mad_port_close(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *port_priv;
unsigned long flags;
@@ -3284,7 +3047,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- dev_err(&device->dev, "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %u not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -3304,9 +3067,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}
-static void ib_mad_init_device(struct ib_device *device)
+static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
+ unsigned int count = 0;
+ int ret;
start = rdma_start_port(device);
@@ -3314,17 +3079,23 @@ static void ib_mad_init_device(struct ib_device *device)
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_mad_port_open(device, i)) {
+ ret = ib_mad_port_open(device, i);
+ if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
- if (ib_agent_port_open(device, i)) {
+ ret = ib_agent_port_open(device, i);
+ if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
+ count++;
}
- return;
+ if (!count)
+ return -EOPNOTSUPP;
+
+ return 0;
error_agent:
if (ib_mad_port_close(device, i))
@@ -3341,6 +3112,7 @@ error:
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
+ return ret;
}
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
@@ -3353,9 +3125,9 @@ static void ib_mad_remove_device(struct ib_device *device, void *client_data)
if (ib_agent_port_close(device, i))
dev_err(&device->dev,
- "Couldn't close port %d for agents\n", i);
+ "Couldn't close port %u for agents\n", i);
if (ib_mad_port_close(device, i))
- dev_err(&device->dev, "Couldn't close port %d\n", i);
+ dev_err(&device->dev, "Couldn't close port %u\n", i);
}
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 956b3a7dfed7..1b7445a6f671 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -79,13 +79,13 @@ struct ib_mad_private {
struct ib_mad_private_header header;
size_t mad_size;
struct ib_grh grh;
- u8 mad[0];
+ u8 mad[];
} __packed;
struct ib_rmpp_segment {
struct list_head list;
u32 num;
- u8 data[0];
+ u8 data[];
};
struct ib_mad_agent_private {
@@ -103,7 +103,7 @@ struct ib_mad_agent_private {
struct work_struct local_work;
struct list_head rmpp_list;
- atomic_t refcount;
+ refcount_t refcount;
union {
struct completion comp;
struct rcu_head rcu;
@@ -115,7 +115,6 @@ struct ib_mad_snoop_private {
struct ib_mad_qp_info *qp_info;
int snoop_index;
int mad_snoop_flags;
- atomic_t refcount;
struct completion comp;
};
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 5ec57abc0849..8af0619a39cd 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -40,8 +40,7 @@
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
- RMPP_STATE_COMPLETE,
- RMPP_STATE_CANCELING
+ RMPP_STATE_COMPLETE
};
struct mad_rmpp_recv {
@@ -52,7 +51,7 @@ struct mad_rmpp_recv {
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
@@ -73,7 +72,7 @@ struct mad_rmpp_recv {
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
- if (atomic_dec_and_test(&rmpp_recv->refcount))
+ if (refcount_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
@@ -92,22 +91,18 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
- if (rmpp_recv->state != RMPP_STATE_COMPLETE)
- ib_free_recv_mad(rmpp_recv->rmpp_wc);
- rmpp_recv->state = RMPP_STATE_CANCELING;
- }
- spin_unlock_irqrestore(&agent->lock, flags);
-
- list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
+ spin_unlock_irqrestore(&agent->lock, flags);
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
destroy_rmpp_recv(rmpp_recv);
}
}
@@ -272,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
- if (rmpp_recv->state == RMPP_STATE_CANCELING) {
- spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
- return;
- }
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
@@ -305,7 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
- atomic_set(&rmpp_recv->refcount, 1);
+ refcount_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
@@ -357,7 +348,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent,
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
@@ -391,8 +382,8 @@ static inline int get_seg_num(struct ib_mad_recv_buf *seg)
return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
}
-static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
- struct ib_mad_recv_buf *seg)
+static inline struct ib_mad_recv_buf *get_next_seg(struct list_head *rmpp_list,
+ struct ib_mad_recv_buf *seg)
{
if (seg->list.next == rmpp_list)
return NULL;
@@ -405,8 +396,8 @@ static inline int window_size(struct ib_mad_agent_private *agent)
return max(agent->qp_info->recv_queue.max_active >> 3, 1);
}
-static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
- int seg_num)
+static struct ib_mad_recv_buf *find_seg_location(struct list_head *rmpp_list,
+ int seg_num)
{
struct ib_mad_recv_buf *seg_buf;
int cur_seg_num;
@@ -458,7 +449,7 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
return hdr_size + rmpp_recv->seg_num * data_size - pad;
}
-static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
+static struct ib_mad_recv_wc *complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_mad_recv_wc *rmpp_wc;
@@ -553,7 +544,7 @@ start_rmpp(struct ib_mad_agent_private *agent,
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index cd338ddc4a39..a236532a9026 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"
-static void mcast_add_one(struct ib_device *device);
+static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
@@ -61,9 +61,9 @@ struct mcast_port {
struct mcast_device *dev;
spinlock_t lock;
struct rb_root table;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
- u8 port_num;
+ u32 port_num;
};
struct mcast_device {
@@ -71,7 +71,7 @@ struct mcast_device {
struct ib_event_handler event_handler;
int start_port;
int end_port;
- struct mcast_port port[0];
+ struct mcast_port port[];
};
enum mcast_state {
@@ -117,7 +117,7 @@ struct mcast_member {
struct mcast_group *group;
struct list_head list;
enum mcast_state state;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
};
@@ -178,7 +178,7 @@ static struct mcast_group *mcast_insert(struct mcast_port *port,
static void deref_port(struct mcast_port *port)
{
- if (atomic_dec_and_test(&port->refcount))
+ if (refcount_dec_and_test(&port->refcount))
complete(&port->comp);
}
@@ -199,7 +199,7 @@ static void release_group(struct mcast_group *group)
static void deref_member(struct mcast_member *member)
{
- if (atomic_dec_and_test(&member->refcount))
+ if (refcount_dec_and_test(&member->refcount))
complete(&member->comp);
}
@@ -401,7 +401,7 @@ static void process_group_error(struct mcast_group *group)
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
adjust_membership(group, member->multicast.rec.join_state, -1);
member->state = MCAST_ERROR;
@@ -445,7 +445,7 @@ retest:
struct mcast_member, list);
multicast = &member->multicast;
join_state = multicast->rec.join_state;
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
if (join_state == (group->rec.join_state & join_state)) {
status = cmp_rec(&group->rec, &multicast->rec,
@@ -497,7 +497,7 @@ static void process_join_error(struct mcast_group *group, int status)
member = list_entry(group->pending_list.next,
struct mcast_member, list);
if (group->last_join == member) {
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(status, &member->multicast);
@@ -589,7 +589,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
kfree(group);
group = cur_group;
} else
- atomic_inc(&port->refcount);
+ refcount_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
@@ -605,7 +605,7 @@ found:
*/
struct ib_sa_multicast *
ib_sa_join_multicast(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
@@ -632,7 +632,7 @@ ib_sa_join_multicast(struct ib_sa_client *client,
member->multicast.callback = callback;
member->multicast.context = context;
init_completion(&member->comp);
- atomic_set(&member->refcount, 1);
+ refcount_set(&member->refcount, 1);
member->state = MCAST_JOINING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
@@ -690,7 +690,7 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
}
EXPORT_SYMBOL(ib_sa_free_multicast);
-int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
{
struct mcast_device *dev;
@@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* member record and gid of the device.
* @device: RDMA device
* @port_num: Port of the rdma device to consider
+ * @rec: Multicast member record to use
* @ndev: Optional netdevice, applicable only for RoCE
* @gid_type: GID type to consider
* @ah_attr: AH attribute to fillup on successful completion
@@ -731,7 +732,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* success or appropriate error code.
*
*/
-int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
@@ -815,7 +816,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}
-static void mcast_add_one(struct ib_device *device)
+static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
@@ -825,7 +826,7 @@ static void mcast_add_one(struct ib_device *device)
dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
- return;
+ return -ENOMEM;
dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
@@ -839,13 +840,13 @@ static void mcast_add_one(struct ib_device *device)
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
- atomic_set(&port->refcount, 1);
+ refcount_set(&port->refcount, 1);
++count;
}
if (!count) {
kfree(dev);
- return;
+ return -EOPNOTSUPP;
}
dev->device = device;
@@ -853,6 +854,7 @@ static void mcast_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
+ return 0;
}
static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -861,9 +863,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;
- if (!dev)
- return;
-
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 8cd31ef25eff..1b2cc9e45ade 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -98,7 +98,7 @@ get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
*/
up_read(&rdma_nl_types[type].sem);
- request_module("rdma-netlink-subsys-%d", type);
+ request_module("rdma-netlink-subsys-%u", type);
down_read(&rdma_nl_types[type].sem);
cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e0b0a91da696..12dc97067ed2 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -92,7 +92,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
.len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
@@ -114,6 +116,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
[RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
@@ -129,6 +132,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
@@ -145,6 +153,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -241,7 +252,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
{
char fw[IB_FW_VERSION_NAME_MAX];
int ret = 0;
- u8 port;
+ u32 port;
if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
@@ -384,6 +395,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
[RDMA_RESTRACK_CTX] = "ctx",
+ [RDMA_RESTRACK_SRQ] = "srq",
};
struct nlattr *table_attr;
@@ -446,27 +458,11 @@ static int fill_res_name_pid(struct sk_buff *msg,
return err ? -EMSGSIZE : 0;
}
-static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (!dev->ops.fill_res_entry)
- return false;
- return dev->ops.fill_res_entry(msg, res);
-}
-
-static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (!dev->ops.fill_stat_entry)
- return false;
- return dev->ops.fill_stat_entry(msg, res);
-}
-
-static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
- struct rdma_restrack_entry *res, uint32_t port)
+static int fill_res_qp_entry_query(struct sk_buff *msg,
+ struct rdma_restrack_entry *res,
+ struct ib_device *dev,
+ struct ib_qp *qp)
{
- struct ib_qp *qp = container_of(res, struct ib_qp, res);
- struct ib_device *dev = qp->device;
struct ib_qp_init_attr qp_init_attr;
struct ib_qp_attr qp_attr;
int ret;
@@ -475,16 +471,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (ret)
return ret;
- if (port && port != qp_attr.port_num)
- return -EAGAIN;
-
- /* In create_qp() port is not set yet */
- if (qp_attr.port_num &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
- goto err;
-
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
- goto err;
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
qp_attr.dest_qp_num))
@@ -508,19 +494,53 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
+ if (dev->ops.fill_res_qp_entry)
+ return dev->ops.fill_res_qp_entry(msg, qp);
+ return 0;
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
+ int ret;
+
+ if (port && port != qp->port)
+ return -EAGAIN;
+
+ /* In create_qp() port is not set yet */
+ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+ return -EINVAL;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+ if (ret)
+ return -EMSGSIZE;
+
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_name_pid(msg, res))
- goto err;
+ ret = fill_res_name_pid(msg, res);
+ if (ret)
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return fill_res_qp_entry_query(msg, res, dev, qp);
+}
- return 0;
+static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
-err: return -EMSGSIZE;
+ if (port && port != qp->port)
+ return -EAGAIN;
+ if (!dev->ops.fill_res_qp_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_qp_entry_raw(msg, qp);
}
static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -568,9 +588,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (fill_res_name_pid(msg, res))
goto err;
- if (fill_res_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_res_cm_id_entry)
+ return dev->ops.fill_res_cm_id_entry(msg, cm_id);
return 0;
err: return -EMSGSIZE;
@@ -583,35 +602,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct ib_device *dev = cq->device;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
/* Poll context is only valid for kernel CQs */
if (rdma_is_kernel_res(res) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
cq->uobject->uevent.uobject.context->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_cq_entry) ?
+ dev->ops.fill_res_cq_entry(msg, cq) : 0;
+}
- return 0;
+static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct ib_device *dev = cq->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_cq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_cq_entry_raw(msg, cq);
}
static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -622,38 +648,45 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
- goto err;
+ return -EMSGSIZE;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_mr_entry) ?
+ dev->ops.fill_res_mr_entry(msg, mr) :
+ 0;
+}
- return 0;
+static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_mr_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_mr_entry_raw(msg, mr);
}
static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
- struct ib_device *dev = pd->device;
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
@@ -676,15 +709,138 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
pd->uobject->context->res.id))
goto err;
- if (fill_res_name_pid(msg, res))
- goto err;
+ return fill_res_name_pid(msg, res);
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
+
+ if (rdma_is_kernel_res(res))
+ return 0;
- if (fill_res_entry(dev, msg, res))
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
+ return -EMSGSIZE;
+
+ return fill_res_name_pid(msg, res);
+}
+
+static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
+ uint32_t max_range)
+{
+ struct nlattr *entry_attr;
+
+ if (!min_range)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (min_range == max_range) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
+ goto err;
+ }
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
+{
+ uint32_t min_range = 0, prev = 0;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ rt = &srq->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
+ rdma_restrack_put(res);
+ continue;
+ }
+
+ if (qp->qp_num < prev)
+ /* qp_num should be ascending */
+ goto err_loop;
+
+ if (min_range == 0) {
+ min_range = qp->qp_num;
+ } else if (qp->qp_num > (prev + 1)) {
+ if (fill_res_range_qp_entry(msg, min_range, prev))
+ goto err_loop;
+
+ min_range = qp->qp_num;
+ }
+ prev = qp->qp_num;
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+
+ if (fill_res_range_qp_entry(msg, min_range, prev))
goto err;
+ nla_nest_end(msg, table_attr);
return 0;
-err: return -EMSGSIZE;
+err_loop:
+ rdma_restrack_put(res);
+ xa_unlock(&rt->xa);
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_srq *srq = container_of(res, struct ib_srq, res);
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
+ goto err;
+
+ if (ib_srq_has_cq(srq->srq_type)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
+ srq->ext.cq->res.id))
+ goto err;
+ }
+
+ if (fill_res_srq_qps(msg, srq))
+ goto err;
+
+ return fill_res_name_pid(msg, res);
+
+err:
+ return -EMSGSIZE;
}
static int fill_stat_counter_mode(struct sk_buff *msg,
@@ -695,11 +851,16 @@ static int fill_stat_counter_mode(struct sk_buff *msg,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
return -EMSGSIZE;
- if (m->mode == RDMA_COUNTER_MODE_AUTO)
+ if (m->mode == RDMA_COUNTER_MODE_AUTO) {
if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
return -EMSGSIZE;
+ if ((m->mask & RDMA_COUNTER_MASK_PID) &&
+ fill_res_name_pid(msg, &counter->res))
+ return -EMSGSIZE;
+ }
+
return 0;
}
@@ -738,9 +899,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
xa_lock(&rt->xa);
xa_for_each(&rt->xa, id, res) {
qp = container_of(res, struct ib_qp, res);
- if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- continue;
-
if (!qp->counter || (qp->counter->id != counter->id))
continue;
@@ -793,9 +951,8 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
goto err;
- if (fill_stat_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_stat_mr_entry)
+ return dev->ops.fill_stat_mr_entry(msg, mr);
return 0;
err:
@@ -813,14 +970,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
if (!table_attr)
return -EMSGSIZE;
- for (i = 0; i < st->num_counters; i++)
- if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
+ mutex_lock(&st->lock);
+ for (i = 0; i < st->num_counters; i++) {
+ if (test_bit(i, st->is_disabled))
+ continue;
+ if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
+ st->value[i]))
goto err;
+ }
+ mutex_unlock(&st->lock);
nla_nest_end(msg, table_attr);
return 0;
err:
+ mutex_unlock(&st->lock);
nla_nest_cancel(msg, table_attr);
return -EMSGSIZE;
}
@@ -840,7 +1004,6 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
- fill_res_name_pid(msg, &counter->res) ||
fill_stat_counter_mode(msg, counter) ||
fill_stat_counter_qps(msg, counter) ||
fill_stat_counter_hwcounters(msg, counter))
@@ -916,8 +1079,12 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
char name[IB_DEVICE_NAME_MAX] = {};
- nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
err = ib_device_rename(device, name);
goto done;
}
@@ -1173,7 +1340,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
struct nldev_fill_res_entry {
enum rdma_nldev_attr nldev_attr;
- enum rdma_nldev_command nldev_cmd;
u8 flags;
u32 entry;
u32 id;
@@ -1185,44 +1351,51 @@ enum nldev_res_flags {
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
[RDMA_RESTRACK_COUNTER] = {
- .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
},
+ [RDMA_RESTRACK_CTX] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_CTXN,
+ },
+ [RDMA_RESTRACK_SRQ] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_SRQN,
+ },
+
};
static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1277,7 +1450,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);
if (fill_nldev_handle(msg, device)) {
@@ -1288,11 +1462,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
ret = fill_func(msg, has_cap_net_admin, res, port);
-
- rdma_restrack_put(res);
if (ret)
goto err_free;
+ rdma_restrack_put(res);
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
@@ -1356,7 +1529,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
@@ -1438,27 +1612,32 @@ err_index:
return ret;
}
-#define RES_GET_FUNCS(name, type) \
- static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
+#define RES_GET_FUNCS(name, type) \
+ static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
- { \
- return res_get_common_dumpit(skb, cb, type, \
- fill_res_##name##_entry); \
- } \
- static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
- struct nlmsghdr *nlh, \
+ { \
+ return res_get_common_dumpit(skb, cb, type, \
+ fill_res_##name##_entry); \
+ } \
+ static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
+ struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
- { \
- return res_get_common_doit(skb, nlh, extack, type, \
- fill_res_##name##_entry); \
+ { \
+ return res_get_common_doit(skb, nlh, extack, type, \
+ fill_res_##name##_entry); \
}
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
+RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
+RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
static LIST_HEAD(link_ops);
static DECLARE_RWSEM(link_ops_rwsem);
@@ -1512,13 +1691,13 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
!tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
return -EINVAL;
- nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
- if (strchr(ibdev_name, '%'))
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
- nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
- nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+ nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+ nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
sizeof(ndev_name));
ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
@@ -1560,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
+ if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}
@@ -1585,7 +1764,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
return -EINVAL;
- nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
+ nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
sizeof(client_name));
if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
@@ -1680,6 +1859,19 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_free(msg);
return err;
}
+
+ /*
+ * Copy-on-fork is supported.
+ * See commits:
+ * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
+ * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
+ * for more details. Don't backport this without them.
+ *
+ * Return value ignored on purpose, assume copy-on-fork is not
+ * supported in case of failure.
+ */
+ nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
+
nlmsg_end(msg, nlh);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
}
@@ -1705,24 +1897,113 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
+static int nldev_stat_set_mode_doit(struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[],
+ struct ib_device *device, u32 port)
+{
+ u32 mode, mask = 0, qpn, cntn = 0;
+ int ret;
+
+ /* Currently only counter for QP is supported */
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
+ if (mode == RDMA_COUNTER_MODE_AUTO) {
+ if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
+ mask = nla_get_u32(
+ tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
+ return rdma_counter_set_auto_mode(device, port, mask, extack);
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
+ if (ret)
+ return ret;
+ } else {
+ ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
+ if (ret)
+ return ret;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ return 0;
+
+err_fill:
+ rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ return ret;
+}
+
+static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
+ struct ib_device *device,
+ u32 port)
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *entry_attr;
+ unsigned long *target;
+ int rem, i, ret = 0;
+ u32 index;
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats)
+ return -EINVAL;
+
+ target = kcalloc(BITS_TO_LONGS(stats->num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!target)
+ return -ENOMEM;
+
+ nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
+ rem) {
+ index = nla_get_u32(entry_attr);
+ if ((index >= stats->num_counters) ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_bit(index, target);
+ }
+
+ for (i = 0; i < stats->num_counters; i++) {
+ if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
+ continue;
+
+ ret = rdma_counter_modify(device, port, i, test_bit(i, target));
+ if (ret)
+ goto out;
+ }
+
+out:
+ kfree(target);
+ return ret;
+}
+
static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- u32 index, port, mode, mask = 0, qpn, cntn = 0;
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
struct sk_buff *msg;
+ u32 index, port;
int ret;
- ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
- /* Currently only counter for QP is supported */
- if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
- !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
- !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
- return -EINVAL;
-
- if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
@@ -1733,61 +2014,49 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
if (!rdma_is_port_valid(device, port)) {
ret = -EINVAL;
- goto err;
+ goto err_put_device;
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
+ !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = -EINVAL;
+ goto err_put_device;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_put_device;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_SET),
0, 0);
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_free_msg;
+ }
- mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
- if (mode == RDMA_COUNTER_MODE_AUTO) {
- if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
- mask = nla_get_u32(
- tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
-
- ret = rdma_counter_set_auto_mode(device, port,
- mask ? true : false, mask);
- if (ret)
- goto err_msg;
- } else {
- if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
- goto err_msg;
- qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
- if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
- cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
- ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
- } else {
- ret = rdma_counter_bind_qpn_alloc(device, port,
- qpn, &cntn);
- }
+ if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
+ ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
if (ret)
- goto err_msg;
+ goto err_free_msg;
+ }
- if (fill_nldev_handle(msg, device) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
- ret = -EMSGSIZE;
- goto err_fill;
- }
+ if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
+ if (ret)
+ goto err_free_msg;
}
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
-err_fill:
- rdma_counter_unbind_qpn(device, port, qpn, cntn);
-err_msg:
+err_free_msg:
nlmsg_free(msg);
-err:
+err_put_device:
ib_device_put(device);
return ret;
}
@@ -1879,13 +2148,14 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
if (!device)
return -EINVAL;
- if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
+ if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
ret = -EINVAL;
goto err;
}
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
- if (!rdma_is_port_valid(device, port)) {
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
ret = -EINVAL;
goto err;
}
@@ -1907,11 +2177,6 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
goto err_msg;
}
- stats = device->port_data ? device->port_data[port].hw_stats : NULL;
- if (stats == NULL) {
- ret = -EINVAL;
- goto err_msg;
- }
mutex_lock(&stats->lock);
num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
@@ -1926,9 +2191,13 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
goto err_stats;
}
for (i = 0; i < num_cnts; i++) {
+ if (test_bit(i, stats->is_disabled))
+ continue;
+
v = stats->value[i] +
rdma_counter_get_hwstat_value(device, port, i);
- if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
+ if (rdma_nl_stat_hwcounter_entry(msg,
+ stats->descs[i].name, v)) {
ret = -EMSGSIZE;
goto err_table;
}
@@ -2076,6 +2345,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb,
return ret;
}
+static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
+ struct rdma_hw_stats *stats;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 devid, port;
+ int ret, i;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), devid);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(
+ msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
+ 0, 0);
+
+ ret = -EMSGSIZE;
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+ goto err_msg;
+
+ table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table)
+ goto err_msg;
+
+ mutex_lock(&stats->lock);
+ for (i = 0; i < stats->num_counters; i++) {
+ entry = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry)
+ goto err_msg_table;
+
+ if (nla_put_string(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ stats->descs[i].name) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
+ goto err_msg_entry;
+
+ if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
+ (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
+ !test_bit(i, stats->is_disabled))))
+ goto err_msg_entry;
+
+ nla_nest_end(msg, entry);
+ }
+ mutex_unlock(&stats->lock);
+
+ nla_nest_end(msg, table);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg_entry:
+ nla_nest_cancel(msg, entry);
+err_msg_table:
+ mutex_unlock(&stats->lock);
+ nla_nest_cancel(msg, table);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -2124,6 +2486,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_res_get_pd_doit,
.dump = nldev_res_get_pd_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_CTX_GET] = {
+ .doit = nldev_res_get_ctx_doit,
+ .dump = nldev_res_get_ctx_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
+ .doit = nldev_res_get_srq_doit,
+ .dump = nldev_res_get_srq_dumpit,
+ },
[RDMA_NLDEV_CMD_SYS_GET] = {
.doit = nldev_sys_get_doit,
},
@@ -2142,6 +2512,24 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_stat_del_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
+ [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
+ .doit = nldev_res_get_qp_raw_doit,
+ .dump = nldev_res_get_qp_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
+ .doit = nldev_res_get_cq_raw_doit,
+ .dump = nldev_res_get_cq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
+ .doit = nldev_res_get_mr_raw_doit,
+ .dump = nldev_res_get_mr_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
+ .doit = nldev_stat_get_counter_status_doit,
+ },
};
void __init nldev_init(void)
@@ -2149,7 +2537,7 @@ void __init nldev_init(void)
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
}
-void __exit nldev_exit(void)
+void nldev_exit(void)
{
rdma_nl_unregister(RDMA_NL_NLDEV);
}
diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h
index af4879bdf3d6..64e2822af70f 100644
--- a/drivers/infiniband/core/opa_smi.h
+++ b/drivers/infiniband/core/opa_smi.h
@@ -40,11 +40,11 @@
#include "smi.h"
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int opa_smi_get_fwd_port(struct opa_smp *smp);
extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp);
extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 5128cb16bb48..29b1ab1d5f93 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
- * access locks, since only a single one of them is is allowed
+ * access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
@@ -112,7 +112,7 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj,
* however the type's allocat_commit function cannot have been called and the
* uobject cannot be on the uobjects_lists
*
- * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * For RDMA_REMOVE_DESTROY the caller should be holding a kref (eg via
* rdma_lookup_get_uobject) and the object is left in a state where the caller
* needs to call rdma_lookup_put_uobject.
*
@@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
} else if (uobj->object) {
ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
attrs);
- if (ret) {
- if (ib_is_destroy_retryable(ret, reason, uobj))
- return ret;
-
- /* Nothing to be done, dangle the memory and move on */
- WARN(true,
- "ib_uverbs: failed to remove uobject id %d, driver err=%d",
- uobj->id, ret);
- }
+ if (ret)
+ /* Nothing to be done, wait till ucontext will clean it */
+ return ret;
uobj->object = NULL;
}
@@ -153,9 +147,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
uobj->context = NULL;
/*
- * For DESTROY the usecnt is held write